1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
129 getl_location (&e.where.filename, &e.where.line_number);
130 e.title = _("corrupt system file: ");
132 va_start (args, format);
133 err_vmsg (&e, format, args);
137 /* Closes a system file after we're done with it. */
139 sfm_close_reader (struct sfm_reader *r)
145 fh_close (r->fh, "system file", "rs");
148 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 handle_get_filename (r->fh), strerror (errno));
158 /* Dictionary reader. */
160 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
163 static int read_header (struct sfm_reader *,
164 struct dictionary *, struct sfm_read_info *);
165 static int parse_format_spec (struct sfm_reader *, int32,
166 struct fmt_spec *, struct variable *);
167 static int read_value_labels (struct sfm_reader *, struct dictionary *,
168 struct variable **var_by_idx);
169 static int read_variables (struct sfm_reader *,
170 struct dictionary *, struct variable ***var_by_idx);
171 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
172 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
173 static int read_documents (struct sfm_reader *, struct dictionary *);
175 static int fread_ok (struct sfm_reader *, void *, size_t);
177 /* Displays the message X with corrupt_msg, then jumps to the error
185 /* Calls buf_read with the specified arguments, and jumps to
186 error if the read fails. */
187 #define assertive_buf_read(a,b,c,d) \
189 if (!buf_read (a,b,c,d)) \
193 /* Opens the system file designated by file handle FH for
194 reading. Reads the system file's dictionary into *DICT.
195 If INFO is non-null, then it receives additional info about the
198 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
199 struct sfm_read_info *info)
201 struct sfm_reader *r = NULL;
202 struct variable **var_by_idx = NULL;
204 *dict = dict_create ();
205 if (!fh_open (fh, "system file", "rs"))
208 /* Create and initialize reader. */
209 r = xmalloc (sizeof *r);
211 r->file = fn_open (handle_get_filename (fh), "rb");
213 r->reverse_endian = 0;
224 r->sysmis = -FLT64_MAX;
225 r->highest = FLT64_MAX;
226 r->lowest = second_lowest_flt64;
228 r->buf = r->ptr = r->end = NULL;
229 r->y = r->x + sizeof r->x;
231 /* Check that file open succeeded. */
234 msg (ME, _("An error occurred while opening \"%s\" for reading "
235 "as a system file: %s."),
236 handle_get_filename (r->fh), strerror (errno));
241 /* Read header and variables. */
242 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
245 /* Handle weighting. */
246 if (r->weight_idx != -1)
248 struct variable *weight_var = var_by_idx[r->weight_idx];
250 if (weight_var == NULL)
252 _("%s: Weighting variable may not be a continuation of "
253 "a long string variable."), handle_get_filename (fh)));
254 else if (weight_var->type == ALPHA)
255 lose ((ME, _("%s: Weighting variable may not be a string variable."),
256 handle_get_filename (fh)));
258 dict_set_weight (*dict, weight_var);
261 dict_set_weight (*dict, NULL);
263 /* Read records of types 3, 4, 6, and 7. */
268 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
269 if (r->reverse_endian)
270 bswap_int32 (&rec_type);
275 if (!read_value_labels (r, *dict, var_by_idx))
280 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
281 "records must always immediately follow type 3 "
283 handle_get_filename (r->fh)));
286 if (!read_documents (r, *dict))
303 assertive_buf_read (r, &data, sizeof data, 0);
304 if (r->reverse_endian)
306 bswap_int32 (&data.subtype);
307 bswap_int32 (&data.size);
308 bswap_int32 (&data.count);
310 bytes = data.size * data.count;
311 if (bytes < data.size || bytes < data.count)
312 lose ((ME, "%s: Record type %d subtype %d too large.",
313 handle_get_filename (r->fh), rec_type, data.subtype));
315 switch (data.subtype)
318 if (!read_machine_int32_info (r, data.size, data.count))
323 if (!read_machine_flt64_info (r, data.size, data.count))
328 case 6: /* ?? Used by SPSS 8.0. */
332 case 11: /* Variable display parameters */
334 const int n_vars = data.count / 3 ;
336 if ( data.count % 3 )
338 msg (MW, _("%s: Invalid subrecord length. "
339 "Record: 7; Subrecord: 11"),
340 handle_get_filename (r->fh));
344 for ( i = 0 ; i < n_vars ; ++i )
356 assertive_buf_read (r, ¶ms, sizeof(params), 0);
358 v = dict_get_var(*dict, i);
360 v->measure = params.measure;
361 v->display_width = params.width;
362 v->alignment = params.align;
367 case 13: /* SPSS 12.0 Long variable name map */
369 char *buf, *short_name, *save_ptr;
373 buf = xmalloc (bytes + 1);
374 if (!buf_read (r, buf, bytes, 0))
382 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
384 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
386 char *long_name = strtok_r (NULL, "\t", &save_ptr);
389 /* Validate long name. */
390 if (long_name == NULL)
392 msg (MW, _("%s: Trailing garbage in long variable "
394 handle_get_filename (r->fh));
397 if (!var_is_valid_name (long_name, false))
399 msg (MW, _("%s: Long variable mapping to invalid "
400 "variable name `%s'."),
401 handle_get_filename (r->fh), long_name);
405 /* Find variable using short name. */
406 v = dict_lookup_var (*dict, short_name);
409 msg (MW, _("%s: Long variable mapping for "
410 "nonexistent variable %s."),
411 handle_get_filename (r->fh), short_name);
416 Renaming a variable may clear the short
417 name, but we want to retain it, so
418 re-set it explicitly. */
419 dict_rename_var (*dict, v, long_name);
420 var_set_short_name (v, short_name);
422 /* For compatability, make sure dictionary
423 is in long variable name map order. In
424 the common case, this has no effect,
425 because the dictionary and the long
426 variable name map are already in the
428 dict_reorder_var (*dict, v, idx);
437 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
438 "encountered in system file."),
439 handle_get_filename (r->fh), data.subtype);
445 void *x = buf_read (r, NULL, data.size * data.count, 0);
457 assertive_buf_read (r, &filler, sizeof filler, 0);
462 lose ((ME, _("%s: Unrecognized record type %d."),
463 handle_get_filename (r->fh), rec_type));
468 /* Come here on successful completion. */
473 /* Come here on unsuccessful completion. */
474 sfm_close_reader (r);
478 dict_destroy (*dict);
484 /* Read record type 7, subtype 3. */
486 read_machine_int32_info (struct sfm_reader *r, int size, int count)
493 if (size != sizeof (int32) || count != 8)
494 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
495 "subtype 3. Expected size %d, count 8."),
496 handle_get_filename (r->fh), size, count, sizeof (int32)));
498 assertive_buf_read (r, data, sizeof data, 0);
499 if (r->reverse_endian)
500 for (i = 0; i < 8; i++)
501 bswap_int32 (&data[i]);
505 lose ((ME, _("%s: Floating-point representation in system file is not "
506 "IEEE-754. PSPP cannot convert between floating-point "
508 handle_get_filename (r->fh)));
510 #error Add support for your floating-point format.
513 #ifdef WORDS_BIGENDIAN
518 if (r->reverse_endian)
520 if (file_bigendian ^ (data[6] == 1))
521 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
522 "endianness intuited from file header (%s)."),
523 handle_get_filename (r->fh),
524 file_bigendian ? _("big-endian") : _("little-endian"),
525 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
528 /* PORTME: Character representation code. */
529 if (data[7] != 2 && data[7] != 3)
530 lose ((ME, _("%s: File-indicated character representation code (%s) is "
532 handle_get_filename (r->fh),
533 (data[7] == 1 ? "EBCDIC"
534 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
542 /* Read record type 7, subtype 4. */
544 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
549 if (size != sizeof (flt64) || count != 3)
550 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
551 "subtype 4. Expected size %d, count 8."),
552 handle_get_filename (r->fh), size, count, sizeof (flt64)));
554 assertive_buf_read (r, data, sizeof data, 0);
555 if (r->reverse_endian)
556 for (i = 0; i < 3; i++)
557 bswap_flt64 (&data[i]);
559 if (data[0] != SYSMIS || data[1] != FLT64_MAX
560 || data[2] != second_lowest_flt64)
563 r->highest = data[1];
565 msg (MW, _("%s: File-indicated value is different from internal value "
566 "for at least one of the three system values. SYSMIS: "
567 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
569 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
570 (double) data[1], (double) FLT64_MAX,
571 (double) data[2], (double) second_lowest_flt64);
581 read_header (struct sfm_reader *r,
582 struct dictionary *dict, struct sfm_read_info *info)
584 struct sysfile_header hdr; /* Disk buffer. */
585 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
586 int skip_amt = 0; /* Amount of product name to omit. */
589 /* Read header, check magic. */
590 assertive_buf_read (r, &hdr, sizeof hdr, 0);
591 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
592 lose ((ME, _("%s: Bad magic. Proper system files begin with "
593 "the four characters `$FL2'. This file will not be read."),
594 handle_get_filename (r->fh)));
596 /* Check eye-catcher string. */
597 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
598 for (i = 0; i < 60; i++)
599 if (!isprint ((unsigned char) prod_name[i]))
601 for (i = 59; i >= 0; i--)
602 if (!isgraph ((unsigned char) prod_name[i]))
607 prod_name[60] = '\0';
611 static const char *prefix[N_PREFIXES] =
613 "@(#) SPSS DATA FILE",
619 for (i = 0; i < N_PREFIXES; i++)
620 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
622 skip_amt = strlen (prefix[i]);
627 /* Check endianness. */
628 if (hdr.layout_code == 2)
629 r->reverse_endian = 0;
632 bswap_int32 (&hdr.layout_code);
633 if (hdr.layout_code != 2)
634 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
635 "should be 2, in big-endian or little-endian format."),
636 handle_get_filename (r->fh), hdr.layout_code));
638 r->reverse_endian = 1;
639 bswap_int32 (&hdr.case_size);
640 bswap_int32 (&hdr.compress);
641 bswap_int32 (&hdr.weight_idx);
642 bswap_int32 (&hdr.case_cnt);
643 bswap_flt64 (&hdr.bias);
646 /* Copy basic info and verify correctness. */
647 r->value_cnt = hdr.case_size;
648 if (r->value_cnt <= 0
649 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
650 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
652 handle_get_filename (r->fh), r->value_cnt,
653 INT_MAX / sizeof (union value) / 2));
655 r->compressed = hdr.compress;
657 r->weight_idx = hdr.weight_idx - 1;
658 if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
659 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
660 "and number of elements per case (%d)."),
661 handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
663 r->case_cnt = hdr.case_cnt;
664 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
666 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
667 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
670 if (r->bias != 100.0)
671 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
673 handle_get_filename (r->fh), r->bias);
675 /* Make a file label only on the condition that the given label is
676 not all spaces or nulls. */
680 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
681 if (!isspace ((unsigned char) hdr.file_label[i])
682 && hdr.file_label[i] != 0)
684 char *label = xmalloc (i + 2);
685 memcpy (label, hdr.file_label, i + 1);
687 dict_set_label (dict, label);
697 memcpy (info->creation_date, hdr.creation_date, 9);
698 info->creation_date[9] = 0;
700 memcpy (info->creation_time, hdr.creation_time, 8);
701 info->creation_time[8] = 0;
703 #ifdef WORDS_BIGENDIAN
704 info->big_endian = !r->reverse_endian;
706 info->big_endian = r->reverse_endian;
709 info->compressed = hdr.compress;
711 info->case_cnt = hdr.case_cnt;
713 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
714 if (isgraph ((unsigned char) *cp))
716 strcpy (info->product, cp);
725 /* Reads most of the dictionary from file H; also fills in the
726 associated VAR_BY_IDX array. */
728 read_variables (struct sfm_reader *r,
729 struct dictionary *dict, struct variable ***var_by_idx)
733 struct sysfile_variable sv; /* Disk buffer. */
734 int long_string_count = 0; /* # of long string continuation
735 records still expected. */
736 int next_value = 0; /* Index to next `value' structure. */
741 /* Allocate variables. */
742 *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
744 /* Read in the entry for each variable and use the info to
745 initialize the dictionary. */
746 for (i = 0; i < r->value_cnt; i++)
753 assertive_buf_read (r, &sv, sizeof sv, 0);
755 if (r->reverse_endian)
757 bswap_int32 (&sv.rec_type);
758 bswap_int32 (&sv.type);
759 bswap_int32 (&sv.has_var_label);
760 bswap_int32 (&sv.n_missing_values);
761 bswap_int32 (&sv.print);
762 bswap_int32 (&sv.write);
765 if (sv.rec_type != 2)
766 lose ((ME, _("%s: position %d: Bad record type (%d); "
767 "the expected value was 2."),
768 handle_get_filename (r->fh), i, sv.rec_type));
770 /* If there was a long string previously, make sure that the
771 continuations are present; otherwise make sure there aren't
773 if (long_string_count)
776 lose ((ME, _("%s: position %d: String variable does not have "
777 "proper number of continuation records."),
778 handle_get_filename (r->fh), i));
780 (*var_by_idx)[i] = NULL;
784 else if (sv.type == -1)
785 lose ((ME, _("%s: position %d: Superfluous long string continuation "
787 handle_get_filename (r->fh), i));
789 /* Check fields for validity. */
790 if (sv.type < 0 || sv.type > 255)
791 lose ((ME, _("%s: position %d: Bad variable type code %d."),
792 handle_get_filename (r->fh), i, sv.type));
793 if (sv.has_var_label != 0 && sv.has_var_label != 1)
794 lose ((ME, _("%s: position %d: Variable label indicator field is not "
795 "0 or 1."), handle_get_filename (r->fh), i));
796 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
797 || sv.n_missing_values == -1)
798 lose ((ME, _("%s: position %d: Missing value indicator field is not "
799 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
801 /* Copy first character of variable name. */
802 if (!isalpha ((unsigned char) sv.name[0])
803 && sv.name[0] != '@' && sv.name[0] != '#')
804 lose ((ME, _("%s: position %d: Variable name begins with invalid "
806 handle_get_filename (r->fh), i));
807 if (islower ((unsigned char) sv.name[0]))
808 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
810 handle_get_filename (r->fh), i, sv.name[0]);
811 if (sv.name[0] == '#')
812 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
813 "(`#'). Scratch variables should not appear in system "
815 handle_get_filename (r->fh), i);
816 name[0] = toupper ((unsigned char) (sv.name[0]));
818 /* Copy remaining characters of variable name. */
819 for (j = 1; j < SHORT_NAME_LEN; j++)
821 int c = (unsigned char) sv.name[j];
825 else if (islower (c))
827 msg (MW, _("%s: position %d: Variable name character %d is "
828 "lowercase letter %c."),
829 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
830 name[j] = toupper ((unsigned char) (c));
832 else if (isalnum (c) || c == '.' || c == '@'
833 || c == '#' || c == '$' || c == '_')
836 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
838 handle_get_filename (r->fh), i, c, c));
842 /* Create variable. */
843 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
845 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
846 handle_get_filename (r->fh), name));
847 var_set_short_name (vv, vv->name);
849 /* Case reading data. */
850 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
851 long_string_count = nv - 1;
854 /* Get variable label, if any. */
855 if (sv.has_var_label == 1)
860 /* Read length of label. */
861 assertive_buf_read (r, &len, sizeof len, 0);
862 if (r->reverse_endian)
866 if (len < 0 || len > 255)
867 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
869 handle_get_filename (r->fh), vv->name, len));
873 /* Read label into variable structure. */
874 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
875 if (vv->label == NULL)
877 vv->label[len] = '\0';
881 /* Set missing values. */
882 if (sv.n_missing_values != 0)
886 if (vv->width > MAX_SHORT_STRING)
887 lose ((ME, _("%s: Long string variable %s may not have missing "
889 handle_get_filename (r->fh), vv->name));
891 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
893 if (r->reverse_endian && vv->type == NUMERIC)
894 for (j = 0; j < abs (sv.n_missing_values); j++)
895 bswap_flt64 (&mv[j]);
897 if (sv.n_missing_values > 0)
899 vv->miss_type = sv.n_missing_values;
900 if (vv->type == NUMERIC)
901 for (j = 0; j < sv.n_missing_values; j++)
902 vv->missing[j].f = mv[j];
904 for (j = 0; j < sv.n_missing_values; j++)
905 memcpy (vv->missing[j].s, &mv[j], vv->width);
911 if (vv->type == ALPHA)
912 lose ((ME, _("%s: String variable %s may not have missing "
913 "values specified as a range."),
914 handle_get_filename (r->fh), vv->name));
916 if (mv[0] == r->lowest)
918 vv->miss_type = MISSING_LOW;
919 vv->missing[x++].f = mv[1];
921 else if (mv[1] == r->highest)
923 vv->miss_type = MISSING_HIGH;
924 vv->missing[x++].f = mv[0];
928 vv->miss_type = MISSING_RANGE;
929 vv->missing[x++].f = mv[0];
930 vv->missing[x++].f = mv[1];
933 if (sv.n_missing_values == -3)
936 vv->missing[x++].f = mv[2];
941 vv->miss_type = MISSING_NONE;
943 if (!parse_format_spec (r, sv.print, &vv->print, vv)
944 || !parse_format_spec (r, sv.write, &vv->write, vv))
947 /* Add variable to list. */
948 if (var_cap >= r->var_cnt)
950 var_cap = 2 + r->var_cnt * 2;
951 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
953 r->vars[r->var_cnt].width = vv->width;
954 r->vars[r->var_cnt].fv = vv->fv;
958 /* Some consistency checks. */
959 if (long_string_count != 0)
960 lose ((ME, _("%s: Long string continuation records omitted at end of "
962 handle_get_filename (r->fh)));
963 if (next_value != r->value_cnt)
964 lose ((ME, _("%s: System file header indicates %d variable positions but "
965 "%d were read from file."),
966 handle_get_filename (r->fh), r->value_cnt, next_value));
974 /* Translates the format spec from sysfile format to internal
977 parse_format_spec (struct sfm_reader *r, int32 s,
978 struct fmt_spec *f, struct variable *v)
980 f->type = translate_fmt ((s >> 16) & 0xff);
982 lose ((ME, _("%s: Bad format specifier byte (%d)."),
983 handle_get_filename (r->fh), (s >> 16) & 0xff));
984 f->w = (s >> 8) & 0xff;
987 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
988 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
989 handle_get_filename (r->fh),
990 v->type == ALPHA ? _("String") : _("Numeric"),
992 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
993 formats[f->type].name));
995 if (!check_output_specifier (f, false)
996 || !check_specifier_width (f, v->width, false))
998 msg (ME, _("%s variable %s has invalid format specifier %s."),
999 v->type == NUMERIC ? _("Numeric") : _("String"),
1000 v->name, fmt_to_string (f));
1001 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1009 /* Reads value labels from sysfile H and inserts them into the
1010 associated dictionary. */
1012 read_value_labels (struct sfm_reader *r,
1013 struct dictionary *dict, struct variable **var_by_idx)
1017 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
1018 union value value; /* Value. */
1019 char *label; /* Null-terminated label string. */
1022 struct label *labels = NULL;
1023 int32 n_labels; /* Number of labels. */
1025 struct variable **var = NULL; /* Associated variables. */
1026 int32 n_vars; /* Number of associated variables. */
1030 /* First step: read the contents of the type 3 record and record its
1031 contents. Note that we can't do much with the data since we
1032 don't know yet whether it is of numeric or string type. */
1034 /* Read number of labels. */
1035 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1036 if (r->reverse_endian)
1037 bswap_int32 (&n_labels);
1039 /* Allocate memory. */
1040 labels = xmalloc (n_labels * sizeof *labels);
1041 for (i = 0; i < n_labels; i++)
1042 labels[i].label = NULL;
1044 /* Read each value/label tuple into labels[]. */
1045 for (i = 0; i < n_labels; i++)
1047 struct label *label = labels + i;
1048 unsigned char label_len;
1052 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1054 /* Read label length. */
1055 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1056 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1058 /* Read label, padding. */
1059 label->label = xmalloc (padded_len + 1);
1060 assertive_buf_read (r, label->label, padded_len - 1, 0);
1061 label->label[label_len] = 0;
1064 /* Second step: Read the type 4 record that has the list of
1065 variables to which the value labels are to be applied. */
1067 /* Read record type of type 4 record. */
1071 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1072 if (r->reverse_endian)
1073 bswap_int32 (&rec_type);
1076 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1077 "follow value label record (type 3) as it should."),
1078 handle_get_filename (r->fh)));
1081 /* Read number of variables associated with value label from type 4
1083 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1084 if (r->reverse_endian)
1085 bswap_int32 (&n_vars);
1086 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1087 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1088 "is not between 1 and the number of variables (%d)."),
1089 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1091 /* Read the list of variables. */
1092 var = xmalloc (n_vars * sizeof *var);
1093 for (i = 0; i < n_vars; i++)
1098 /* Read variable index, check range. */
1099 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1100 if (r->reverse_endian)
1101 bswap_int32 (&var_idx);
1102 if (var_idx < 1 || var_idx > r->value_cnt)
1103 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1104 "not between 1 and the number of values (%d)."),
1105 handle_get_filename (r->fh), var_idx, r->value_cnt));
1107 /* Make sure it's a real variable. */
1108 v = var_by_idx[var_idx - 1];
1110 lose ((ME, _("%s: Variable index associated with value label (%d) "
1111 "refers to a continuation of a string variable, not to "
1112 "an actual variable."),
1113 handle_get_filename (r->fh), var_idx));
1114 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1115 lose ((ME, _("%s: Value labels are not allowed on long string "
1117 handle_get_filename (r->fh), v->name));
1119 /* Add it to the list of variables. */
1123 /* Type check the variables. */
1124 for (i = 1; i < n_vars; i++)
1125 if (var[i]->type != var[0]->type)
1126 lose ((ME, _("%s: Variables associated with value label are not all of "
1127 "identical type. Variable %s has %s type, but variable "
1129 handle_get_filename (r->fh),
1130 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1131 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1133 /* Fill in labels[].value, now that we know the desired type. */
1134 for (i = 0; i < n_labels; i++)
1136 struct label *label = labels + i;
1138 if (var[0]->type == ALPHA)
1140 const int copy_len = min (sizeof (label->raw_value),
1141 sizeof (label->label));
1142 memcpy (label->value.s, label->raw_value, copy_len);
1145 assert (sizeof f == sizeof label->raw_value);
1146 memcpy (&f, label->raw_value, sizeof f);
1147 if (r->reverse_endian)
1153 /* Assign the value_label's to each variable. */
1154 for (i = 0; i < n_vars; i++)
1156 struct variable *v = var[i];
1159 /* Add each label to the variable. */
1160 for (j = 0; j < n_labels; j++)
1162 struct label *label = labels + j;
1163 if (!val_labs_replace (v->val_labs, label->value, label->label))
1166 if (var[0]->type == NUMERIC)
1167 msg (MW, _("%s: File contains duplicate label for value %g for "
1169 handle_get_filename (r->fh), label->value.f, v->name);
1171 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1172 "for variable %s."),
1173 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1177 for (i = 0; i < n_labels; i++)
1178 free (labels[i].label);
1186 for (i = 0; i < n_labels; i++)
1187 free (labels[i].label);
1194 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1195 non-NULL, uses that as the buffer; otherwise allocates at least
1196 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1199 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1203 if (buf == NULL && byte_cnt > 0 )
1204 buf = xmalloc (max (byte_cnt, min_alloc));
1206 if ( byte_cnt == 0 )
1210 if (1 != fread (buf, byte_cnt, 1, r->file))
1212 if (ferror (r->file))
1213 msg (ME, _("%s: Reading system file: %s."),
1214 handle_get_filename (r->fh), strerror (errno));
1216 corrupt_msg (ME, _("%s: Unexpected end of file."),
1217 handle_get_filename (r->fh));
1223 /* Reads a document record, type 6, from system file R, and sets up
1224 the documents and n_documents fields in the associated
1227 read_documents (struct sfm_reader *r, struct dictionary *dict)
1232 if (dict_get_documents (dict) != NULL)
1233 lose ((ME, _("%s: System file contains multiple "
1234 "type 6 (document) records."),
1235 handle_get_filename (r->fh)));
1237 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1239 lose ((ME, _("%s: Number of document lines (%ld) "
1240 "must be greater than 0."),
1241 handle_get_filename (r->fh), (long) line_cnt));
1243 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1244 /* FIXME? Run through asciify. */
1245 if (documents == NULL)
1247 documents[80 * line_cnt] = '\0';
1248 dict_set_documents (dict, documents);
1258 /* Reads compressed data into H->BUF and sets other pointers
1259 appropriately. Returns nonzero only if both no errors occur and
1262 buffer_input (struct sfm_reader *r)
1267 r->buf = xmalloc (sizeof *r->buf * 128);
1268 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1269 if (ferror (r->file))
1271 msg (ME, _("%s: Error reading file: %s."),
1272 handle_get_filename (r->fh), strerror (errno));
1276 r->end = &r->buf[amt];
1280 /* Reads a single case consisting of compressed data from system
1281 file H into the array BUF[] according to reader R, and
1282 returns nonzero only if successful. */
1283 /* Data in system files is compressed in this manner. Data
1284 values are grouped into sets of eight ("octets"). Each value
1285 in an octet has one instruction byte that are output together.
1286 Each instruction byte gives a value for that byte or indicates
1287 that the value can be found following the instructions. */
1289 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1291 const unsigned char *p_end = r->x + sizeof (flt64);
1292 unsigned char *p = r->y;
1294 const flt64 *buf_beg = buf;
1295 const flt64 *buf_end = &buf[r->value_cnt];
1299 for (; p < p_end; p++)
1303 /* Code 0 is ignored. */
1306 /* Code 252 is end of file. */
1308 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1309 "in partial case."),
1310 handle_get_filename (r->fh)));
1313 /* Code 253 indicates that the value is stored explicitly
1314 following the instruction bytes. */
1315 if (r->ptr == NULL || r->ptr >= r->end)
1316 if (!buffer_input (r))
1318 lose ((ME, _("%s: Unexpected end of file."),
1319 handle_get_filename (r->fh)));
1322 memcpy (buf++, r->ptr++, sizeof *buf);
1327 /* Code 254 indicates a string that is all blanks. */
1328 memset (buf++, ' ', sizeof *buf);
1333 /* Code 255 indicates the system-missing value. */
1335 if (r->reverse_endian)
1342 /* Codes 1 through 251 inclusive are taken to indicate a
1343 value of (BYTE - BIAS), where BYTE is the byte's value
1344 and BIAS is the compression bias (generally 100.0). */
1345 *buf = *p - r->bias;
1346 if (r->reverse_endian)
1354 /* We have reached the end of this instruction octet. Read
1356 if (r->ptr == NULL || r->ptr >= r->end)
1357 if (!buffer_input (r))
1360 lose ((ME, _("%s: Unexpected end of file."),
1361 handle_get_filename (r->fh)));
1364 memcpy (r->x, r->ptr++, sizeof *buf);
1372 /* We have filled up an entire record. Update state and return
1378 /* We have been unsuccessful at filling a record, either through i/o
1379 error or through an end-of-file indication. Update state and
1380 return unsuccessfully. */
1384 /* Reads one case from READER's file into C. Returns nonzero
1385 only if successful. */
1387 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1389 if (!r->compressed && sizeof (flt64) == sizeof (double))
1391 /* Fast path: external and internal representations are the
1392 same, except possibly for endianness or SYSMIS. Read
1393 directly into the case's buffer, then fix up any minor
1394 details as needed. */
1395 if (!fread_ok (r, case_data_all_rw (c),
1396 sizeof (union value) * r->value_cnt))
1399 /* Fix up endianness if needed. */
1400 if (r->reverse_endian)
1404 for (i = 0; i < r->var_cnt; i++)
1405 if (r->vars[i].width == 0)
1406 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1409 /* Fix up SYSMIS values if needed.
1410 I don't think this will ever actually kick in, but it
1412 if (r->sysmis != SYSMIS)
1416 for (i = 0; i < r->var_cnt; i++)
1417 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1418 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1423 /* Slow path: internal and external representations differ.
1424 Read into a bounce buffer, then copy to C. */
1431 bounce_size = sizeof *bounce * r->value_cnt;
1432 bounce = bounce_cur = local_alloc (bounce_size);
1435 read_ok = fread_ok (r, bounce, bounce_size);
1437 read_ok = read_compressed_data (r, bounce);
1440 local_free (bounce);
1444 for (i = 0; i < r->var_cnt; i++)
1446 struct sfm_var *v = &r->vars[i];
1450 flt64 f = *bounce_cur++;
1451 if (r->reverse_endian)
1453 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1457 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1458 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1462 local_free (bounce);
1468 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1470 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1472 if (read_bytes == byte_cnt)
1476 if (ferror (r->file))
1477 msg (ME, _("%s: Reading system file: %s."),
1478 handle_get_filename (r->fh), strerror (errno));
1479 else if (read_bytes != 0)
1480 msg (ME, _("%s: Partial record at end of system file."),
1481 handle_get_filename (r->fh));