1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
45 #define _(msgid) gettext (msgid)
47 #include "debug-print.h"
49 /* System file reader. */
52 struct file_handle *fh; /* File handle. */
53 FILE *file; /* File stream. */
55 int reverse_endian; /* 1=file has endianness opposite us. */
56 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
57 int value_cnt; /* Number of `union values's per case. */
58 long case_cnt; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_idx; /* 0-based index of weighting variable, or -1. */
64 struct sfm_var *vars; /* Variables. */
66 /* File's special constants. */
71 /* Decompression buffer. */
72 flt64 *buf; /* Buffer data. */
73 flt64 *ptr; /* Current location in buffer. */
74 flt64 *end; /* End of buffer data. */
76 /* Compression instruction octet. */
77 unsigned char x[8]; /* Current instruction octet. */
78 unsigned char *y; /* Location in current instruction octet. */
81 /* A variable in a system file. */
84 int width; /* 0=numeric, otherwise string width. */
85 int fv; /* Index into case. */
90 /* Swap bytes *A and *B. */
92 bswap (unsigned char *a, unsigned char *b)
99 /* Reverse the byte order of 32-bit integer *X. */
101 bswap_int32 (int32 *x_)
103 unsigned char *x = (unsigned char *) x_;
104 bswap (x + 0, x + 3);
105 bswap (x + 1, x + 2);
108 /* Reverse the byte order of 64-bit floating point *X. */
110 bswap_flt64 (flt64 *x_)
112 unsigned char *x = (unsigned char *) x_;
113 bswap (x + 0, x + 7);
114 bswap (x + 1, x + 6);
115 bswap (x + 2, x + 5);
116 bswap (x + 3, x + 4);
120 corrupt_msg (int class, const char *format,...)
121 PRINTF_FORMAT (2, 3);
123 /* Displays a corrupt sysfile error. */
125 corrupt_msg (int class, const char *format,...)
131 getl_location (&e.where.filename, &e.where.line_number);
132 e.title = _("corrupt system file: ");
134 va_start (args, format);
135 err_vmsg (&e, format, args);
139 /* Closes a system file after we're done with it. */
141 sfm_close_reader (struct sfm_reader *r)
147 fh_close (r->fh, "system file", "rs");
150 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
151 msg (ME, _("%s: Closing system file: %s."),
152 handle_get_filename (r->fh), strerror (errno));
160 /* Dictionary reader. */
162 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
164 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
167 static int read_header (struct sfm_reader *,
168 struct dictionary *, struct sfm_read_info *);
169 static int parse_format_spec (struct sfm_reader *, int32,
170 struct fmt_spec *, struct variable *);
171 static int read_value_labels (struct sfm_reader *, struct dictionary *,
172 struct variable **var_by_idx);
173 static int read_variables (struct sfm_reader *,
174 struct dictionary *, struct variable ***var_by_idx);
175 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
176 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
177 static int read_documents (struct sfm_reader *, struct dictionary *);
179 static int fread_ok (struct sfm_reader *, void *, size_t);
181 /* Displays the message X with corrupt_msg, then jumps to the error
189 /* Calls buf_read with the specified arguments, and jumps to
190 error if the read fails. */
191 #define assertive_buf_read(a,b,c,d) \
193 if (!buf_read (a,b,c,d)) \
197 /* Opens the system file designated by file handle FH for
198 reading. Reads the system file's dictionary into *DICT.
199 If INFO is non-null, then it receives additional info about the
202 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
203 struct sfm_read_info *info)
205 struct sfm_reader *r = NULL;
206 struct variable **var_by_idx = NULL;
208 *dict = dict_create ();
209 if (!fh_open (fh, "system file", "rs"))
212 /* Create and initialize reader. */
213 r = xmalloc (sizeof *r);
215 r->file = fn_open (handle_get_filename (fh), "rb");
217 r->reverse_endian = 0;
227 r->sysmis = -FLT64_MAX;
228 r->highest = FLT64_MAX;
229 r->lowest = second_lowest_flt64;
231 r->buf = r->ptr = r->end = NULL;
232 r->y = r->x + sizeof r->x;
234 /* Check that file open succeeded. */
237 msg (ME, _("An error occurred while opening \"%s\" for reading "
238 "as a system file: %s."),
239 handle_get_filename (r->fh), strerror (errno));
244 /* Read header and variables. */
245 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
249 /* Handle weighting. */
250 if (r->weight_idx != -1)
252 struct variable *weight_var;
254 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
255 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
256 "and number of elements per case (%d)."),
257 handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
260 weight_var = var_by_idx[r->weight_idx];
262 if (weight_var == NULL)
264 _("%s: Weighting variable may not be a continuation of "
265 "a long string variable."), handle_get_filename (fh)));
266 else if (weight_var->type == ALPHA)
267 lose ((ME, _("%s: Weighting variable may not be a string variable."),
268 handle_get_filename (fh)));
270 dict_set_weight (*dict, weight_var);
273 dict_set_weight (*dict, NULL);
275 /* Read records of types 3, 4, 6, and 7. */
280 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
281 if (r->reverse_endian)
282 bswap_int32 (&rec_type);
287 if (!read_value_labels (r, *dict, var_by_idx))
292 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
293 "records must always immediately follow type 3 "
295 handle_get_filename (r->fh)));
298 if (!read_documents (r, *dict))
315 assertive_buf_read (r, &data, sizeof data, 0);
316 if (r->reverse_endian)
318 bswap_int32 (&data.subtype);
319 bswap_int32 (&data.size);
320 bswap_int32 (&data.count);
322 bytes = data.size * data.count;
323 if (bytes < data.size || bytes < data.count)
324 lose ((ME, "%s: Record type %d subtype %d too large.",
325 handle_get_filename (r->fh), rec_type, data.subtype));
327 switch (data.subtype)
330 if (!read_machine_int32_info (r, data.size, data.count))
335 if (!read_machine_flt64_info (r, data.size, data.count))
340 case 6: /* ?? Used by SPSS 8.0. */
344 case 11: /* Variable display parameters */
346 const int n_vars = data.count / 3 ;
348 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
350 msg (MW, _("%s: Invalid subrecord length. "
351 "Record: 7; Subrecord: 11"),
352 handle_get_filename (r->fh));
356 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
368 assertive_buf_read (r, ¶ms, sizeof(params), 0);
370 v = dict_get_var(*dict, i);
372 v->measure = params.measure;
373 v->display_width = params.width;
374 v->alignment = params.align;
379 case 13: /* SPSS 12.0 Long variable name map */
381 char *buf, *short_name, *save_ptr;
385 buf = xmalloc (bytes + 1);
386 if (!buf_read (r, buf, bytes, 0))
394 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
396 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
398 char *long_name = strtok_r (NULL, "\t", &save_ptr);
401 /* Validate long name. */
402 if (long_name == NULL)
404 msg (MW, _("%s: Trailing garbage in long variable "
406 handle_get_filename (r->fh));
409 if (!var_is_valid_name (long_name, false))
411 msg (MW, _("%s: Long variable mapping to invalid "
412 "variable name `%s'."),
413 handle_get_filename (r->fh), long_name);
417 /* Find variable using short name. */
418 v = dict_lookup_var (*dict, short_name);
421 msg (MW, _("%s: Long variable mapping for "
422 "nonexistent variable %s."),
423 handle_get_filename (r->fh), short_name);
427 /* Identify any duplicates. */
428 if ( compare_var_names(short_name, long_name, 0) &&
429 NULL != dict_lookup_var (*dict, long_name))
431 lose ((ME, _("%s: Duplicate long variable name `%s' "
432 "within system file."),
433 handle_get_filename (r->fh), long_name));
438 Renaming a variable may clear the short
439 name, but we want to retain it, so
440 re-set it explicitly. */
441 dict_rename_var (*dict, v, long_name);
442 var_set_short_name (v, short_name);
444 /* For compatability, make sure dictionary
445 is in long variable name map order. In
446 the common case, this has no effect,
447 because the dictionary and the long
448 variable name map are already in the
450 dict_reorder_var (*dict, v, idx);
459 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
460 "encountered in system file."),
461 handle_get_filename (r->fh), data.subtype);
467 void *x = buf_read (r, NULL, data.size * data.count, 0);
479 assertive_buf_read (r, &filler, sizeof filler, 0);
484 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
485 handle_get_filename (r->fh), rec_type);
490 /* Come here on successful completion. */
495 /* Come here on unsuccessful completion. */
496 sfm_close_reader (r);
500 dict_destroy (*dict);
506 /* Read record type 7, subtype 3. */
508 read_machine_int32_info (struct sfm_reader *r, int size, int count)
515 if (size != sizeof (int32) || count != 8)
516 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
517 "subtype 3. Expected size %d, count 8."),
518 handle_get_filename (r->fh), size, count, sizeof (int32)));
520 assertive_buf_read (r, data, sizeof data, 0);
521 if (r->reverse_endian)
522 for (i = 0; i < 8; i++)
523 bswap_int32 (&data[i]);
527 lose ((ME, _("%s: Floating-point representation in system file is not "
528 "IEEE-754. PSPP cannot convert between floating-point "
530 handle_get_filename (r->fh)));
532 #error Add support for your floating-point format.
535 #ifdef WORDS_BIGENDIAN
540 if (r->reverse_endian)
542 if (file_bigendian ^ (data[6] == 1))
543 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
544 "endianness intuited from file header (%s)."),
545 handle_get_filename (r->fh),
546 file_bigendian ? _("big-endian") : _("little-endian"),
547 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
550 /* PORTME: Character representation code. */
551 if (data[7] != 2 && data[7] != 3)
552 lose ((ME, _("%s: File-indicated character representation code (%s) is "
554 handle_get_filename (r->fh),
555 (data[7] == 1 ? "EBCDIC"
556 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
564 /* Read record type 7, subtype 4. */
566 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
571 if (size != sizeof (flt64) || count != 3)
572 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
573 "subtype 4. Expected size %d, count 8."),
574 handle_get_filename (r->fh), size, count, sizeof (flt64)));
576 assertive_buf_read (r, data, sizeof data, 0);
577 if (r->reverse_endian)
578 for (i = 0; i < 3; i++)
579 bswap_flt64 (&data[i]);
581 if (data[0] != SYSMIS || data[1] != FLT64_MAX
582 || data[2] != second_lowest_flt64)
585 r->highest = data[1];
587 msg (MW, _("%s: File-indicated value is different from internal value "
588 "for at least one of the three system values. SYSMIS: "
589 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
591 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
592 (double) data[1], (double) FLT64_MAX,
593 (double) data[2], (double) second_lowest_flt64);
603 read_header (struct sfm_reader *r,
604 struct dictionary *dict, struct sfm_read_info *info)
606 struct sysfile_header hdr; /* Disk buffer. */
607 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
608 int skip_amt = 0; /* Amount of product name to omit. */
611 /* Read header, check magic. */
612 assertive_buf_read (r, &hdr, sizeof hdr, 0);
613 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
614 lose ((ME, _("%s: Bad magic. Proper system files begin with "
615 "the four characters `$FL2'. This file will not be read."),
616 handle_get_filename (r->fh)));
618 /* Check eye-catcher string. */
619 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
620 for (i = 0; i < 60; i++)
621 if (!isprint ((unsigned char) prod_name[i]))
623 for (i = 59; i >= 0; i--)
624 if (!isgraph ((unsigned char) prod_name[i]))
629 prod_name[60] = '\0';
633 static const char *prefix[N_PREFIXES] =
635 "@(#) SPSS DATA FILE",
641 for (i = 0; i < N_PREFIXES; i++)
642 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
644 skip_amt = strlen (prefix[i]);
649 /* Check endianness. */
650 if (hdr.layout_code == 2)
651 r->reverse_endian = 0;
654 bswap_int32 (&hdr.layout_code);
655 if (hdr.layout_code != 2)
656 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
657 "should be 2, in big-endian or little-endian format."),
658 handle_get_filename (r->fh), hdr.layout_code));
660 r->reverse_endian = 1;
661 bswap_int32 (&hdr.case_size);
662 bswap_int32 (&hdr.compress);
663 bswap_int32 (&hdr.weight_idx);
664 bswap_int32 (&hdr.case_cnt);
665 bswap_flt64 (&hdr.bias);
669 /* Copy basic info and verify correctness. */
670 r->value_cnt = hdr.case_size;
672 /* If value count is rediculous, then force it to -1 (a sentinel value) */
673 if ( r->value_cnt < 0 ||
674 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
677 r->compressed = hdr.compress;
679 r->weight_idx = hdr.weight_idx - 1;
681 r->case_cnt = hdr.case_cnt;
682 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
684 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
685 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
688 if (r->bias != 100.0)
689 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
691 handle_get_filename (r->fh), r->bias);
693 /* Make a file label only on the condition that the given label is
694 not all spaces or nulls. */
698 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
699 if (!isspace ((unsigned char) hdr.file_label[i])
700 && hdr.file_label[i] != 0)
702 char *label = xmalloc (i + 2);
703 memcpy (label, hdr.file_label, i + 1);
705 dict_set_label (dict, label);
715 memcpy (info->creation_date, hdr.creation_date, 9);
716 info->creation_date[9] = 0;
718 memcpy (info->creation_time, hdr.creation_time, 8);
719 info->creation_time[8] = 0;
721 #ifdef WORDS_BIGENDIAN
722 info->big_endian = !r->reverse_endian;
724 info->big_endian = r->reverse_endian;
727 info->compressed = hdr.compress;
729 info->case_cnt = hdr.case_cnt;
731 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
732 if (isgraph ((unsigned char) *cp))
734 strcpy (info->product, cp);
743 /* Reads most of the dictionary from file H; also fills in the
744 associated VAR_BY_IDX array. */
746 read_variables (struct sfm_reader *r,
747 struct dictionary *dict, struct variable ***var_by_idx)
751 struct sysfile_variable sv; /* Disk buffer. */
752 int long_string_count = 0; /* # of long string continuation
753 records still expected. */
754 int next_value = 0; /* Index to next `value' structure. */
760 /* Pre-allocate variables. */
761 if ( r->value_cnt != -1 )
763 *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
764 r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) );
768 /* Read in the entry for each variable and use the info to
769 initialize the dictionary. */
773 char name[SHORT_NAME_LEN + 1];
777 if ( r->value_cnt != -1 && i >= r->value_cnt )
780 assertive_buf_read (r, &sv, sizeof sv, 0);
782 if (r->reverse_endian)
784 bswap_int32 (&sv.rec_type);
785 bswap_int32 (&sv.type);
786 bswap_int32 (&sv.has_var_label);
787 bswap_int32 (&sv.n_missing_values);
788 bswap_int32 (&sv.print);
789 bswap_int32 (&sv.write);
792 /* We've come to the end of the variable entries */
793 if (sv.rec_type != 2)
795 buf_unread(r, sizeof sv);
800 if ( -1 == r->value_cnt )
802 *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1));
803 r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) );
806 /* If there was a long string previously, make sure that the
807 continuations are present; otherwise make sure there aren't
809 if (long_string_count)
812 lose ((ME, _("%s: position %d: String variable does not have "
813 "proper number of continuation records."),
814 handle_get_filename (r->fh), i));
817 r->vars[i].width = -1;
818 (*var_by_idx)[i] = NULL;
822 else if (sv.type == -1)
823 lose ((ME, _("%s: position %d: Superfluous long string continuation "
825 handle_get_filename (r->fh), i));
827 /* Check fields for validity. */
828 if (sv.type < 0 || sv.type > 255)
829 lose ((ME, _("%s: position %d: Bad variable type code %d."),
830 handle_get_filename (r->fh), i, sv.type));
831 if (sv.has_var_label != 0 && sv.has_var_label != 1)
832 lose ((ME, _("%s: position %d: Variable label indicator field is not "
833 "0 or 1."), handle_get_filename (r->fh), i));
834 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
835 || sv.n_missing_values == -1)
836 lose ((ME, _("%s: position %d: Missing value indicator field is not "
837 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
839 /* Copy first character of variable name. */
840 if (!isalpha ((unsigned char) sv.name[0])
841 && sv.name[0] != '@' && sv.name[0] != '#')
842 lose ((ME, _("%s: position %d: Variable name begins with invalid "
844 handle_get_filename (r->fh), i));
845 if (islower ((unsigned char) sv.name[0]))
846 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
848 handle_get_filename (r->fh), i, sv.name[0]);
849 if (sv.name[0] == '#')
850 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
851 "(`#'). Scratch variables should not appear in system "
853 handle_get_filename (r->fh), i);
854 name[0] = toupper ((unsigned char) (sv.name[0]));
856 /* Copy remaining characters of variable name. */
857 for (j = 1; j < SHORT_NAME_LEN; j++)
859 int c = (unsigned char) sv.name[j];
863 else if (islower (c))
865 msg (MW, _("%s: position %d: Variable name character %d is "
866 "lowercase letter %c."),
867 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
868 name[j] = toupper ((unsigned char) (c));
870 else if (isalnum (c) || c == '.' || c == '@'
871 || c == '#' || c == '$' || c == '_')
874 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
876 handle_get_filename (r->fh), i, c, c));
880 if ( ! var_is_valid_name(name, false) )
881 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
882 handle_get_filename (r->fh), name));
884 /* Create variable. */
886 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
888 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
889 handle_get_filename (r->fh), name));
891 var_set_short_name (vv, vv->name);
893 /* Case reading data. */
894 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
895 long_string_count = nv - 1;
898 /* Get variable label, if any. */
899 if (sv.has_var_label == 1)
904 /* Read length of label. */
905 assertive_buf_read (r, &len, sizeof len, 0);
906 if (r->reverse_endian)
910 if (len < 0 || len > 255)
911 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
913 handle_get_filename (r->fh), vv->name, len));
917 /* Read label into variable structure. */
918 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
919 if (vv->label == NULL)
921 vv->label[len] = '\0';
925 /* Set missing values. */
926 if (sv.n_missing_values != 0)
930 if (vv->width > MAX_SHORT_STRING)
931 lose ((ME, _("%s: Long string variable %s may not have missing "
933 handle_get_filename (r->fh), vv->name));
935 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
937 if (r->reverse_endian && vv->type == NUMERIC)
938 for (j = 0; j < abs (sv.n_missing_values); j++)
939 bswap_flt64 (&mv[j]);
941 if (sv.n_missing_values > 0)
943 vv->miss_type = sv.n_missing_values;
944 if (vv->type == NUMERIC)
945 for (j = 0; j < sv.n_missing_values; j++)
946 vv->missing[j].f = mv[j];
948 for (j = 0; j < sv.n_missing_values; j++)
949 memcpy (vv->missing[j].s, &mv[j], vv->width);
955 if (vv->type == ALPHA)
956 lose ((ME, _("%s: String variable %s may not have missing "
957 "values specified as a range."),
958 handle_get_filename (r->fh), vv->name));
960 if (mv[0] == r->lowest)
962 vv->miss_type = MISSING_LOW;
963 vv->missing[x++].f = mv[1];
965 else if (mv[1] == r->highest)
967 vv->miss_type = MISSING_HIGH;
968 vv->missing[x++].f = mv[0];
972 vv->miss_type = MISSING_RANGE;
973 vv->missing[x++].f = mv[0];
974 vv->missing[x++].f = mv[1];
977 if (sv.n_missing_values == -3)
980 vv->missing[x++].f = mv[2];
985 vv->miss_type = MISSING_NONE;
987 if (!parse_format_spec (r, sv.print, &vv->print, vv)
988 || !parse_format_spec (r, sv.write, &vv->write, vv))
991 r->vars[i].width = vv->width;
992 r->vars[i].fv = vv->fv;
996 /* Some consistency checks. */
997 if (long_string_count != 0)
998 lose ((ME, _("%s: Long string continuation records omitted at end of "
1000 handle_get_filename (r->fh)));
1002 if (next_value != r->value_cnt)
1003 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1004 "%d were read from file."),
1005 handle_get_filename (r->fh), r->value_cnt, next_value);
1014 /* Translates the format spec from sysfile format to internal
1017 parse_format_spec (struct sfm_reader *r, int32 s,
1018 struct fmt_spec *f, struct variable *v)
1020 f->type = translate_fmt ((s >> 16) & 0xff);
1022 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1023 handle_get_filename (r->fh), (s >> 16) & 0xff));
1024 f->w = (s >> 8) & 0xff;
1027 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1028 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1029 handle_get_filename (r->fh),
1030 v->type == ALPHA ? _("String") : _("Numeric"),
1032 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1033 formats[f->type].name));
1035 if (!check_output_specifier (f, false)
1036 || !check_specifier_width (f, v->width, false))
1038 msg (ME, _("%s variable %s has invalid format specifier %s."),
1039 v->type == NUMERIC ? _("Numeric") : _("String"),
1040 v->name, fmt_to_string (f));
1041 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1049 /* Reads value labels from sysfile H and inserts them into the
1050 associated dictionary. */
1052 read_value_labels (struct sfm_reader *r,
1053 struct dictionary *dict, struct variable **var_by_idx)
1057 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
1058 union value value; /* Value. */
1059 char *label; /* Null-terminated label string. */
1062 struct label *labels = NULL;
1063 int32 n_labels; /* Number of labels. */
1065 struct variable **var = NULL; /* Associated variables. */
1066 int32 n_vars; /* Number of associated variables. */
1070 /* First step: read the contents of the type 3 record and record its
1071 contents. Note that we can't do much with the data since we
1072 don't know yet whether it is of numeric or string type. */
1074 /* Read number of labels. */
1075 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1076 if (r->reverse_endian)
1077 bswap_int32 (&n_labels);
1079 if ( n_labels >= ((int32) ~0) / sizeof *labels)
1081 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1082 handle_get_filename (r->fh), n_labels);
1086 /* Allocate memory. */
1087 labels = xcalloc (n_labels , sizeof *labels);
1088 for (i = 0; i < n_labels; i++)
1089 labels[i].label = NULL;
1091 /* Read each value/label tuple into labels[]. */
1092 for (i = 0; i < n_labels; i++)
1094 struct label *label = labels + i;
1095 unsigned char label_len;
1099 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1101 /* Read label length. */
1102 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1103 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1105 /* Read label, padding. */
1106 label->label = xmalloc (padded_len + 1);
1107 assertive_buf_read (r, label->label, padded_len - 1, 0);
1108 label->label[label_len] = 0;
1111 /* Second step: Read the type 4 record that has the list of
1112 variables to which the value labels are to be applied. */
1114 /* Read record type of type 4 record. */
1118 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1119 if (r->reverse_endian)
1120 bswap_int32 (&rec_type);
1123 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1124 "follow value label record (type 3) as it should."),
1125 handle_get_filename (r->fh)));
1128 /* Read number of variables associated with value label from type 4
1130 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1131 if (r->reverse_endian)
1132 bswap_int32 (&n_vars);
1133 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1134 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1135 "is not between 1 and the number of variables (%d)."),
1136 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1138 /* Read the list of variables. */
1139 var = xmalloc (n_vars * sizeof *var);
1140 for (i = 0; i < n_vars; i++)
1145 /* Read variable index, check range. */
1146 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1147 if (r->reverse_endian)
1148 bswap_int32 (&var_idx);
1149 if (var_idx < 1 || var_idx > r->value_cnt)
1150 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1151 "not between 1 and the number of values (%d)."),
1152 handle_get_filename (r->fh), var_idx, r->value_cnt));
1154 /* Make sure it's a real variable. */
1155 v = var_by_idx[var_idx - 1];
1157 lose ((ME, _("%s: Variable index associated with value label (%d) "
1158 "refers to a continuation of a string variable, not to "
1159 "an actual variable."),
1160 handle_get_filename (r->fh), var_idx));
1161 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1162 lose ((ME, _("%s: Value labels are not allowed on long string "
1164 handle_get_filename (r->fh), v->name));
1166 /* Add it to the list of variables. */
1170 /* Type check the variables. */
1171 for (i = 1; i < n_vars; i++)
1172 if (var[i]->type != var[0]->type)
1173 lose ((ME, _("%s: Variables associated with value label are not all of "
1174 "identical type. Variable %s has %s type, but variable "
1176 handle_get_filename (r->fh),
1177 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1178 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1180 /* Fill in labels[].value, now that we know the desired type. */
1181 for (i = 0; i < n_labels; i++)
1183 struct label *label = labels + i;
1185 if (var[0]->type == ALPHA)
1187 const int copy_len = min (sizeof (label->raw_value),
1188 sizeof (label->label));
1189 memcpy (label->value.s, label->raw_value, copy_len);
1192 assert (sizeof f == sizeof label->raw_value);
1193 memcpy (&f, label->raw_value, sizeof f);
1194 if (r->reverse_endian)
1200 /* Assign the value_label's to each variable. */
1201 for (i = 0; i < n_vars; i++)
1203 struct variable *v = var[i];
1206 /* Add each label to the variable. */
1207 for (j = 0; j < n_labels; j++)
1209 struct label *label = labels + j;
1210 if (!val_labs_replace (v->val_labs, label->value, label->label))
1213 if (var[0]->type == NUMERIC)
1214 msg (MW, _("%s: File contains duplicate label for value %g for "
1216 handle_get_filename (r->fh), label->value.f, v->name);
1218 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1219 "for variable %s."),
1220 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1224 for (i = 0; i < n_labels; i++)
1225 free (labels[i].label);
1233 for (i = 0; i < n_labels; i++)
1234 free (labels[i].label);
1241 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1242 non-NULL, uses that as the buffer; otherwise allocates at least
1243 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1246 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1250 if (buf == NULL && byte_cnt > 0 )
1251 buf = xmalloc (max (byte_cnt, min_alloc));
1253 if ( byte_cnt == 0 )
1257 if (1 != fread (buf, byte_cnt, 1, r->file))
1259 if (ferror (r->file))
1260 msg (ME, _("%s: Reading system file: %s."),
1261 handle_get_filename (r->fh), strerror (errno));
1263 corrupt_msg (ME, _("%s: Unexpected end of file."),
1264 handle_get_filename (r->fh));
1270 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1272 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1274 assert(byte_cnt > 0);
1276 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1278 msg (ME, _("%s: Seeking system file: %s."),
1279 handle_get_filename (r->fh), strerror (errno));
1283 /* Reads a document record, type 6, from system file R, and sets up
1284 the documents and n_documents fields in the associated
1287 read_documents (struct sfm_reader *r, struct dictionary *dict)
1292 if (dict_get_documents (dict) != NULL)
1293 lose ((ME, _("%s: System file contains multiple "
1294 "type 6 (document) records."),
1295 handle_get_filename (r->fh)));
1297 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1299 lose ((ME, _("%s: Number of document lines (%ld) "
1300 "must be greater than 0."),
1301 handle_get_filename (r->fh), (long) line_cnt));
1303 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1304 /* FIXME? Run through asciify. */
1305 if (documents == NULL)
1307 documents[80 * line_cnt] = '\0';
1308 dict_set_documents (dict, documents);
1318 /* Reads compressed data into H->BUF and sets other pointers
1319 appropriately. Returns nonzero only if both no errors occur and
1322 buffer_input (struct sfm_reader *r)
1327 r->buf = xmalloc (sizeof *r->buf * 128);
1328 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1329 if (ferror (r->file))
1331 msg (ME, _("%s: Error reading file: %s."),
1332 handle_get_filename (r->fh), strerror (errno));
1336 r->end = &r->buf[amt];
1340 /* Reads a single case consisting of compressed data from system
1341 file H into the array BUF[] according to reader R, and
1342 returns nonzero only if successful. */
1343 /* Data in system files is compressed in this manner. Data
1344 values are grouped into sets of eight ("octets"). Each value
1345 in an octet has one instruction byte that are output together.
1346 Each instruction byte gives a value for that byte or indicates
1347 that the value can be found following the instructions. */
1349 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1351 const unsigned char *p_end = r->x + sizeof (flt64);
1352 unsigned char *p = r->y;
1354 const flt64 *buf_beg = buf;
1355 const flt64 *buf_end = &buf[r->value_cnt];
1359 for (; p < p_end; p++){
1363 /* Code 0 is ignored. */
1366 /* Code 252 is end of file. */
1368 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1369 "in partial case."),
1370 handle_get_filename (r->fh)));
1373 /* Code 253 indicates that the value is stored explicitly
1374 following the instruction bytes. */
1375 if (r->ptr == NULL || r->ptr >= r->end)
1376 if (!buffer_input (r))
1378 lose ((ME, _("%s: Unexpected end of file."),
1379 handle_get_filename (r->fh)));
1382 memcpy (buf++, r->ptr++, sizeof *buf);
1387 /* Code 254 indicates a string that is all blanks. */
1388 memset (buf++, ' ', sizeof *buf);
1393 /* Code 255 indicates the system-missing value. */
1395 if (r->reverse_endian)
1402 /* Codes 1 through 251 inclusive are taken to indicate a
1403 value of (BYTE - BIAS), where BYTE is the byte's value
1404 and BIAS is the compression bias (generally 100.0). */
1405 *buf = *p - r->bias;
1406 if (r->reverse_endian)
1414 /* We have reached the end of this instruction octet. Read
1416 if (r->ptr == NULL || r->ptr >= r->end)
1417 if (!buffer_input (r))
1420 lose ((ME, _("%s: Unexpected end of file."),
1421 handle_get_filename (r->fh)));
1424 memcpy (r->x, r->ptr++, sizeof *buf);
1432 /* We have filled up an entire record. Update state and return
1438 /* We have been unsuccessful at filling a record, either through i/o
1439 error or through an end-of-file indication. Update state and
1440 return unsuccessfully. */
1444 /* Reads one case from READER's file into C. Returns nonzero
1445 only if successful. */
1447 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1449 if (!r->compressed && sizeof (flt64) == sizeof (double))
1451 /* Fast path: external and internal representations are the
1452 same, except possibly for endianness or SYSMIS. Read
1453 directly into the case's buffer, then fix up any minor
1454 details as needed. */
1455 if (!fread_ok (r, case_data_all_rw (c),
1456 sizeof (union value) * r->value_cnt))
1459 /* Fix up endianness if needed. */
1460 if (r->reverse_endian)
1464 for (i = 0; i < r->value_cnt; i++)
1465 if (r->vars[i].width == 0)
1466 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1469 /* Fix up SYSMIS values if needed.
1470 I don't think this will ever actually kick in, but it
1472 if (r->sysmis != SYSMIS)
1476 for (i = 0; i < r->value_cnt; i++)
1477 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1478 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1483 /* Slow path: internal and external representations differ.
1484 Read into a bounce buffer, then copy to C. */
1491 bounce_size = sizeof *bounce * r->value_cnt;
1492 bounce = bounce_cur = local_alloc (bounce_size);
1495 read_ok = fread_ok (r, bounce, bounce_size);
1497 read_ok = read_compressed_data (r, bounce);
1500 local_free (bounce);
1504 for (i = 0; i < r->value_cnt; i++)
1506 struct sfm_var *v = &r->vars[i];
1510 flt64 f = *bounce_cur++;
1511 if (r->reverse_endian)
1513 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1515 else if (v->width != -1)
1517 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1518 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1522 local_free (bounce);
1528 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1530 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1532 if (read_bytes == byte_cnt)
1536 if (ferror (r->file))
1537 msg (ME, _("%s: Reading system file: %s."),
1538 handle_get_filename (r->fh), strerror (errno));
1539 else if (read_bytes != 0)
1540 msg (ME, _("%s: Partial record at end of system file."),
1541 handle_get_filename (r->fh));