1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
63 /* File's special constants. */
68 /* Decompression buffer. */
69 flt64 *buf; /* Buffer data. */
70 flt64 *ptr; /* Current location in buffer. */
71 flt64 *end; /* End of buffer data. */
73 /* Compression instruction octet. */
74 unsigned char x[8]; /* Current instruction octet. */
75 unsigned char *y; /* Location in current instruction octet. */
78 /* A variable in a system file. */
81 int width; /* 0=numeric, otherwise string width. */
82 int fv; /* Index into case. */
87 /* Swap bytes *A and *B. */
89 bswap (unsigned char *a, unsigned char *b)
96 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
98 bswap_int32 (int32 *x_)
100 unsigned char *x = (unsigned char *) x_;
101 bswap (x + 0, x + 3);
102 bswap (x + 1, x + 2);
105 /* Reverse the byte order of 64-bit floating point *X. */
107 bswap_flt64 (flt64 *x_)
109 unsigned char *x = (unsigned char *) x_;
110 bswap (x + 0, x + 7);
111 bswap (x + 1, x + 6);
112 bswap (x + 2, x + 5);
113 bswap (x + 3, x + 4);
117 corrupt_msg (int class, const char *format,...)
118 PRINTF_FORMAT (2, 3);
120 /* Displays a corrupt sysfile error. */
122 corrupt_msg (int class, const char *format,...)
128 getl_location (&e.where.filename, &e.where.line_number);
129 e.title = _("corrupt system file: ");
131 va_start (args, format);
132 err_vmsg (&e, format, args);
136 /* Closes a system file after we're done with it. */
138 sfm_close_reader (struct sfm_reader *r)
144 fh_close (r->fh, "system file", "rs");
147 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
148 msg (ME, _("%s: Closing system file: %s."),
149 handle_get_filename (r->fh), strerror (errno));
157 /* Dictionary reader. */
159 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
161 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
164 static int read_header (struct sfm_reader *,
165 struct dictionary *, struct sfm_read_info *);
166 static int parse_format_spec (struct sfm_reader *, int32,
167 struct fmt_spec *, struct variable *);
168 static int read_value_labels (struct sfm_reader *, struct dictionary *,
169 struct variable **var_by_idx);
170 static int read_variables (struct sfm_reader *,
171 struct dictionary *, struct variable ***var_by_idx);
172 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
173 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
174 static int read_documents (struct sfm_reader *, struct dictionary *);
176 static int fread_ok (struct sfm_reader *, void *, size_t);
178 /* Displays the message X with corrupt_msg, then jumps to the error
186 /* Calls buf_read with the specified arguments, and jumps to
187 error if the read fails. */
188 #define assertive_buf_read(a,b,c,d) \
190 if (!buf_read (a,b,c,d)) \
194 /* Opens the system file designated by file handle FH for
195 reading. Reads the system file's dictionary into *DICT.
196 If INFO is non-null, then it receives additional info about the
199 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
200 struct sfm_read_info *info)
202 struct sfm_reader *r = NULL;
203 struct variable **var_by_idx = NULL;
205 *dict = dict_create ();
206 if (!fh_open (fh, "system file", "rs"))
209 /* Create and initialize reader. */
210 r = xmalloc (sizeof *r);
212 r->file = fn_open (handle_get_filename (fh), "rb");
214 r->reverse_endian = 0;
224 r->sysmis = -FLT64_MAX;
225 r->highest = FLT64_MAX;
226 r->lowest = second_lowest_flt64;
228 r->buf = r->ptr = r->end = NULL;
229 r->y = r->x + sizeof r->x;
231 /* Check that file open succeeded. */
234 msg (ME, _("An error occurred while opening \"%s\" for reading "
235 "as a system file: %s."),
236 handle_get_filename (r->fh), strerror (errno));
241 /* Read header and variables. */
242 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
246 /* Handle weighting. */
247 if (r->weight_idx != -1)
249 struct variable *weight_var;
251 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
252 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
253 "and number of elements per case (%d)."),
254 handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
257 weight_var = var_by_idx[r->weight_idx];
259 if (weight_var == NULL)
261 _("%s: Weighting variable may not be a continuation of "
262 "a long string variable."), handle_get_filename (fh)));
263 else if (weight_var->type == ALPHA)
264 lose ((ME, _("%s: Weighting variable may not be a string variable."),
265 handle_get_filename (fh)));
267 dict_set_weight (*dict, weight_var);
270 dict_set_weight (*dict, NULL);
272 /* Read records of types 3, 4, 6, and 7. */
277 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
278 if (r->reverse_endian)
279 bswap_int32 (&rec_type);
284 if (!read_value_labels (r, *dict, var_by_idx))
289 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
290 "records must always immediately follow type 3 "
292 handle_get_filename (r->fh)));
295 if (!read_documents (r, *dict))
312 assertive_buf_read (r, &data, sizeof data, 0);
313 if (r->reverse_endian)
315 bswap_int32 (&data.subtype);
316 bswap_int32 (&data.size);
317 bswap_int32 (&data.count);
319 bytes = data.size * data.count;
320 if (bytes < data.size || bytes < data.count)
321 lose ((ME, "%s: Record type %d subtype %d too large.",
322 handle_get_filename (r->fh), rec_type, data.subtype));
324 switch (data.subtype)
327 if (!read_machine_int32_info (r, data.size, data.count))
332 if (!read_machine_flt64_info (r, data.size, data.count))
337 case 6: /* ?? Used by SPSS 8.0. */
341 case 11: /* Variable display parameters */
343 const int n_vars = data.count / 3 ;
345 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
347 msg (MW, _("%s: Invalid subrecord length. "
348 "Record: 7; Subrecord: 11"),
349 handle_get_filename (r->fh));
353 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
365 assertive_buf_read (r, ¶ms, sizeof(params), 0);
367 v = dict_get_var(*dict, i);
369 v->measure = params.measure;
370 v->display_width = params.width;
371 v->alignment = params.align;
376 case 13: /* SPSS 12.0 Long variable name map */
378 char *buf, *short_name, *save_ptr;
382 buf = xmalloc (bytes + 1);
383 if (!buf_read (r, buf, bytes, 0))
391 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
393 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
395 char *long_name = strtok_r (NULL, "\t", &save_ptr);
398 /* Validate long name. */
399 if (long_name == NULL)
401 msg (MW, _("%s: Trailing garbage in long variable "
403 handle_get_filename (r->fh));
406 if (!var_is_valid_name (long_name, false))
408 msg (MW, _("%s: Long variable mapping to invalid "
409 "variable name `%s'."),
410 handle_get_filename (r->fh), long_name);
414 /* Find variable using short name. */
415 v = dict_lookup_var (*dict, short_name);
418 msg (MW, _("%s: Long variable mapping for "
419 "nonexistent variable %s."),
420 handle_get_filename (r->fh), short_name);
424 /* Identify any duplicates. */
425 if ( compare_var_names(short_name, long_name, 0) &&
426 NULL != dict_lookup_var (*dict, long_name))
428 lose ((ME, _("%s: Duplicate long variable name `%s' "
429 "within system file."),
430 handle_get_filename (r->fh), long_name));
435 Renaming a variable may clear the short
436 name, but we want to retain it, so
437 re-set it explicitly. */
438 dict_rename_var (*dict, v, long_name);
439 var_set_short_name (v, short_name);
441 /* For compatability, make sure dictionary
442 is in long variable name map order. In
443 the common case, this has no effect,
444 because the dictionary and the long
445 variable name map are already in the
447 dict_reorder_var (*dict, v, idx);
456 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
457 "encountered in system file."),
458 handle_get_filename (r->fh), data.subtype);
464 void *x = buf_read (r, NULL, data.size * data.count, 0);
476 assertive_buf_read (r, &filler, sizeof filler, 0);
481 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
482 handle_get_filename (r->fh), rec_type);
487 /* Come here on successful completion. */
492 /* Come here on unsuccessful completion. */
493 sfm_close_reader (r);
497 dict_destroy (*dict);
503 /* Read record type 7, subtype 3. */
505 read_machine_int32_info (struct sfm_reader *r, int size, int count)
512 if (size != sizeof (int32) || count != 8)
513 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
514 "subtype 3. Expected size %d, count 8."),
515 handle_get_filename (r->fh), size, count, sizeof (int32)));
517 assertive_buf_read (r, data, sizeof data, 0);
518 if (r->reverse_endian)
519 for (i = 0; i < 8; i++)
520 bswap_int32 (&data[i]);
524 lose ((ME, _("%s: Floating-point representation in system file is not "
525 "IEEE-754. PSPP cannot convert between floating-point "
527 handle_get_filename (r->fh)));
529 #error Add support for your floating-point format.
532 #ifdef WORDS_BIGENDIAN
537 if (r->reverse_endian)
539 if (file_bigendian ^ (data[6] == 1))
540 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
541 "endianness intuited from file header (%s)."),
542 handle_get_filename (r->fh),
543 file_bigendian ? _("big-endian") : _("little-endian"),
544 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
547 /* PORTME: Character representation code. */
548 if (data[7] != 2 && data[7] != 3)
549 lose ((ME, _("%s: File-indicated character representation code (%s) is "
551 handle_get_filename (r->fh),
552 (data[7] == 1 ? "EBCDIC"
553 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
561 /* Read record type 7, subtype 4. */
563 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
568 if (size != sizeof (flt64) || count != 3)
569 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
570 "subtype 4. Expected size %d, count 8."),
571 handle_get_filename (r->fh), size, count, sizeof (flt64)));
573 assertive_buf_read (r, data, sizeof data, 0);
574 if (r->reverse_endian)
575 for (i = 0; i < 3; i++)
576 bswap_flt64 (&data[i]);
578 if (data[0] != SYSMIS || data[1] != FLT64_MAX
579 || data[2] != second_lowest_flt64)
582 r->highest = data[1];
584 msg (MW, _("%s: File-indicated value is different from internal value "
585 "for at least one of the three system values. SYSMIS: "
586 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
588 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
589 (double) data[1], (double) FLT64_MAX,
590 (double) data[2], (double) second_lowest_flt64);
600 read_header (struct sfm_reader *r,
601 struct dictionary *dict, struct sfm_read_info *info)
603 struct sysfile_header hdr; /* Disk buffer. */
604 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
605 int skip_amt = 0; /* Amount of product name to omit. */
608 /* Read header, check magic. */
609 assertive_buf_read (r, &hdr, sizeof hdr, 0);
610 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
611 lose ((ME, _("%s: Bad magic. Proper system files begin with "
612 "the four characters `$FL2'. This file will not be read."),
613 handle_get_filename (r->fh)));
615 /* Check eye-catcher string. */
616 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
617 for (i = 0; i < 60; i++)
618 if (!isprint ((unsigned char) prod_name[i]))
620 for (i = 59; i >= 0; i--)
621 if (!isgraph ((unsigned char) prod_name[i]))
626 prod_name[60] = '\0';
630 static const char *prefix[N_PREFIXES] =
632 "@(#) SPSS DATA FILE",
638 for (i = 0; i < N_PREFIXES; i++)
639 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
641 skip_amt = strlen (prefix[i]);
646 /* Check endianness. */
647 if (hdr.layout_code == 2)
648 r->reverse_endian = 0;
651 bswap_int32 (&hdr.layout_code);
652 if (hdr.layout_code != 2)
653 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
654 "should be 2, in big-endian or little-endian format."),
655 handle_get_filename (r->fh), hdr.layout_code));
657 r->reverse_endian = 1;
658 bswap_int32 (&hdr.case_size);
659 bswap_int32 (&hdr.compress);
660 bswap_int32 (&hdr.weight_idx);
661 bswap_int32 (&hdr.case_cnt);
662 bswap_flt64 (&hdr.bias);
666 /* Copy basic info and verify correctness. */
667 r->value_cnt = hdr.case_size;
669 /* If value count is rediculous, then force it to -1 (a sentinel value) */
670 if ( r->value_cnt < 0 ||
671 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
674 r->compressed = hdr.compress;
676 r->weight_idx = hdr.weight_idx - 1;
678 r->case_cnt = hdr.case_cnt;
679 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
681 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
682 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
685 if (r->bias != 100.0)
686 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
688 handle_get_filename (r->fh), r->bias);
690 /* Make a file label only on the condition that the given label is
691 not all spaces or nulls. */
695 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
696 if (!isspace ((unsigned char) hdr.file_label[i])
697 && hdr.file_label[i] != 0)
699 char *label = xmalloc (i + 2);
700 memcpy (label, hdr.file_label, i + 1);
702 dict_set_label (dict, label);
712 memcpy (info->creation_date, hdr.creation_date, 9);
713 info->creation_date[9] = 0;
715 memcpy (info->creation_time, hdr.creation_time, 8);
716 info->creation_time[8] = 0;
718 #ifdef WORDS_BIGENDIAN
719 info->big_endian = !r->reverse_endian;
721 info->big_endian = r->reverse_endian;
724 info->compressed = hdr.compress;
726 info->case_cnt = hdr.case_cnt;
728 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
729 if (isgraph ((unsigned char) *cp))
731 strcpy (info->product, cp);
740 /* Reads most of the dictionary from file H; also fills in the
741 associated VAR_BY_IDX array. */
743 read_variables (struct sfm_reader *r,
744 struct dictionary *dict, struct variable ***var_by_idx)
748 struct sysfile_variable sv; /* Disk buffer. */
749 int long_string_count = 0; /* # of long string continuation
750 records still expected. */
751 int next_value = 0; /* Index to next `value' structure. */
757 /* Pre-allocate variables. */
758 if ( r->value_cnt != -1 )
760 *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
761 r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) );
765 /* Read in the entry for each variable and use the info to
766 initialize the dictionary. */
770 char name[SHORT_NAME_LEN + 1];
774 if ( r->value_cnt != -1 && i >= r->value_cnt )
777 assertive_buf_read (r, &sv, sizeof sv, 0);
779 if (r->reverse_endian)
781 bswap_int32 (&sv.rec_type);
782 bswap_int32 (&sv.type);
783 bswap_int32 (&sv.has_var_label);
784 bswap_int32 (&sv.n_missing_values);
785 bswap_int32 (&sv.print);
786 bswap_int32 (&sv.write);
789 /* We've come to the end of the variable entries */
790 if (sv.rec_type != 2)
792 buf_unread(r, sizeof sv);
797 if ( -1 == r->value_cnt )
799 *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1));
800 r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) );
803 /* If there was a long string previously, make sure that the
804 continuations are present; otherwise make sure there aren't
806 if (long_string_count)
809 lose ((ME, _("%s: position %d: String variable does not have "
810 "proper number of continuation records."),
811 handle_get_filename (r->fh), i));
814 r->vars[i].width = -1;
815 (*var_by_idx)[i] = NULL;
819 else if (sv.type == -1)
820 lose ((ME, _("%s: position %d: Superfluous long string continuation "
822 handle_get_filename (r->fh), i));
824 /* Check fields for validity. */
825 if (sv.type < 0 || sv.type > 255)
826 lose ((ME, _("%s: position %d: Bad variable type code %d."),
827 handle_get_filename (r->fh), i, sv.type));
828 if (sv.has_var_label != 0 && sv.has_var_label != 1)
829 lose ((ME, _("%s: position %d: Variable label indicator field is not "
830 "0 or 1."), handle_get_filename (r->fh), i));
831 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
832 || sv.n_missing_values == -1)
833 lose ((ME, _("%s: position %d: Missing value indicator field is not "
834 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
836 /* Copy first character of variable name. */
837 if (!isalpha ((unsigned char) sv.name[0])
838 && sv.name[0] != '@' && sv.name[0] != '#')
839 lose ((ME, _("%s: position %d: Variable name begins with invalid "
841 handle_get_filename (r->fh), i));
842 if (islower ((unsigned char) sv.name[0]))
843 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
845 handle_get_filename (r->fh), i, sv.name[0]);
846 if (sv.name[0] == '#')
847 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
848 "(`#'). Scratch variables should not appear in system "
850 handle_get_filename (r->fh), i);
851 name[0] = toupper ((unsigned char) (sv.name[0]));
853 /* Copy remaining characters of variable name. */
854 for (j = 1; j < SHORT_NAME_LEN; j++)
856 int c = (unsigned char) sv.name[j];
860 else if (islower (c))
862 msg (MW, _("%s: position %d: Variable name character %d is "
863 "lowercase letter %c."),
864 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
865 name[j] = toupper ((unsigned char) (c));
867 else if (isalnum (c) || c == '.' || c == '@'
868 || c == '#' || c == '$' || c == '_')
871 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
873 handle_get_filename (r->fh), i, c, c));
877 if ( ! var_is_valid_name(name, false) )
878 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
879 handle_get_filename (r->fh), name));
881 /* Create variable. */
883 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
885 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
886 handle_get_filename (r->fh), name));
888 var_set_short_name (vv, vv->name);
890 /* Case reading data. */
891 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
892 long_string_count = nv - 1;
895 /* Get variable label, if any. */
896 if (sv.has_var_label == 1)
901 /* Read length of label. */
902 assertive_buf_read (r, &len, sizeof len, 0);
903 if (r->reverse_endian)
907 if (len < 0 || len > 255)
908 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
910 handle_get_filename (r->fh), vv->name, len));
914 /* Read label into variable structure. */
915 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
916 if (vv->label == NULL)
918 vv->label[len] = '\0';
922 /* Set missing values. */
923 if (sv.n_missing_values != 0)
927 if (vv->width > MAX_SHORT_STRING)
928 lose ((ME, _("%s: Long string variable %s may not have missing "
930 handle_get_filename (r->fh), vv->name));
932 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
934 if (r->reverse_endian && vv->type == NUMERIC)
935 for (j = 0; j < abs (sv.n_missing_values); j++)
936 bswap_flt64 (&mv[j]);
938 if (sv.n_missing_values > 0)
940 vv->miss_type = sv.n_missing_values;
941 if (vv->type == NUMERIC)
942 for (j = 0; j < sv.n_missing_values; j++)
943 vv->missing[j].f = mv[j];
945 for (j = 0; j < sv.n_missing_values; j++)
946 memcpy (vv->missing[j].s, &mv[j], vv->width);
952 if (vv->type == ALPHA)
953 lose ((ME, _("%s: String variable %s may not have missing "
954 "values specified as a range."),
955 handle_get_filename (r->fh), vv->name));
957 if (mv[0] == r->lowest)
959 vv->miss_type = MISSING_LOW;
960 vv->missing[x++].f = mv[1];
962 else if (mv[1] == r->highest)
964 vv->miss_type = MISSING_HIGH;
965 vv->missing[x++].f = mv[0];
969 vv->miss_type = MISSING_RANGE;
970 vv->missing[x++].f = mv[0];
971 vv->missing[x++].f = mv[1];
974 if (sv.n_missing_values == -3)
977 vv->missing[x++].f = mv[2];
982 vv->miss_type = MISSING_NONE;
984 if (!parse_format_spec (r, sv.print, &vv->print, vv)
985 || !parse_format_spec (r, sv.write, &vv->write, vv))
988 r->vars[i].width = vv->width;
989 r->vars[i].fv = vv->fv;
993 /* Some consistency checks. */
994 if (long_string_count != 0)
995 lose ((ME, _("%s: Long string continuation records omitted at end of "
997 handle_get_filename (r->fh)));
999 if (next_value != r->value_cnt)
1000 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1001 "%d were read from file."),
1002 handle_get_filename (r->fh), r->value_cnt, next_value);
1011 /* Translates the format spec from sysfile format to internal
1014 parse_format_spec (struct sfm_reader *r, int32 s,
1015 struct fmt_spec *f, struct variable *v)
1017 f->type = translate_fmt ((s >> 16) & 0xff);
1019 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1020 handle_get_filename (r->fh), (s >> 16) & 0xff));
1021 f->w = (s >> 8) & 0xff;
1024 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1025 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1026 handle_get_filename (r->fh),
1027 v->type == ALPHA ? _("String") : _("Numeric"),
1029 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1030 formats[f->type].name));
1032 if (!check_output_specifier (f, false)
1033 || !check_specifier_width (f, v->width, false))
1035 msg (ME, _("%s variable %s has invalid format specifier %s."),
1036 v->type == NUMERIC ? _("Numeric") : _("String"),
1037 v->name, fmt_to_string (f));
1038 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1046 /* Reads value labels from sysfile H and inserts them into the
1047 associated dictionary. */
1049 read_value_labels (struct sfm_reader *r,
1050 struct dictionary *dict, struct variable **var_by_idx)
1054 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
1055 union value value; /* Value. */
1056 char *label; /* Null-terminated label string. */
1059 struct label *labels = NULL;
1060 int32 n_labels; /* Number of labels. */
1062 struct variable **var = NULL; /* Associated variables. */
1063 int32 n_vars; /* Number of associated variables. */
1067 /* First step: read the contents of the type 3 record and record its
1068 contents. Note that we can't do much with the data since we
1069 don't know yet whether it is of numeric or string type. */
1071 /* Read number of labels. */
1072 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1073 if (r->reverse_endian)
1074 bswap_int32 (&n_labels);
1076 if ( n_labels >= ((int32) ~0) / sizeof *labels)
1078 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1079 handle_get_filename (r->fh), n_labels);
1083 /* Allocate memory. */
1084 labels = xcalloc (n_labels , sizeof *labels);
1085 for (i = 0; i < n_labels; i++)
1086 labels[i].label = NULL;
1088 /* Read each value/label tuple into labels[]. */
1089 for (i = 0; i < n_labels; i++)
1091 struct label *label = labels + i;
1092 unsigned char label_len;
1096 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1098 /* Read label length. */
1099 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1100 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1102 /* Read label, padding. */
1103 label->label = xmalloc (padded_len + 1);
1104 assertive_buf_read (r, label->label, padded_len - 1, 0);
1105 label->label[label_len] = 0;
1108 /* Second step: Read the type 4 record that has the list of
1109 variables to which the value labels are to be applied. */
1111 /* Read record type of type 4 record. */
1115 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1116 if (r->reverse_endian)
1117 bswap_int32 (&rec_type);
1120 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1121 "follow value label record (type 3) as it should."),
1122 handle_get_filename (r->fh)));
1125 /* Read number of variables associated with value label from type 4
1127 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1128 if (r->reverse_endian)
1129 bswap_int32 (&n_vars);
1130 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1131 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1132 "is not between 1 and the number of variables (%d)."),
1133 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1135 /* Read the list of variables. */
1136 var = xmalloc (n_vars * sizeof *var);
1137 for (i = 0; i < n_vars; i++)
1142 /* Read variable index, check range. */
1143 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1144 if (r->reverse_endian)
1145 bswap_int32 (&var_idx);
1146 if (var_idx < 1 || var_idx > r->value_cnt)
1147 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1148 "not between 1 and the number of values (%d)."),
1149 handle_get_filename (r->fh), var_idx, r->value_cnt));
1151 /* Make sure it's a real variable. */
1152 v = var_by_idx[var_idx - 1];
1154 lose ((ME, _("%s: Variable index associated with value label (%d) "
1155 "refers to a continuation of a string variable, not to "
1156 "an actual variable."),
1157 handle_get_filename (r->fh), var_idx));
1158 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1159 lose ((ME, _("%s: Value labels are not allowed on long string "
1161 handle_get_filename (r->fh), v->name));
1163 /* Add it to the list of variables. */
1167 /* Type check the variables. */
1168 for (i = 1; i < n_vars; i++)
1169 if (var[i]->type != var[0]->type)
1170 lose ((ME, _("%s: Variables associated with value label are not all of "
1171 "identical type. Variable %s has %s type, but variable "
1173 handle_get_filename (r->fh),
1174 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1175 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1177 /* Fill in labels[].value, now that we know the desired type. */
1178 for (i = 0; i < n_labels; i++)
1180 struct label *label = labels + i;
1182 if (var[0]->type == ALPHA)
1184 const int copy_len = min (sizeof (label->raw_value),
1185 sizeof (label->label));
1186 memcpy (label->value.s, label->raw_value, copy_len);
1189 assert (sizeof f == sizeof label->raw_value);
1190 memcpy (&f, label->raw_value, sizeof f);
1191 if (r->reverse_endian)
1197 /* Assign the value_label's to each variable. */
1198 for (i = 0; i < n_vars; i++)
1200 struct variable *v = var[i];
1203 /* Add each label to the variable. */
1204 for (j = 0; j < n_labels; j++)
1206 struct label *label = labels + j;
1207 if (!val_labs_replace (v->val_labs, label->value, label->label))
1210 if (var[0]->type == NUMERIC)
1211 msg (MW, _("%s: File contains duplicate label for value %g for "
1213 handle_get_filename (r->fh), label->value.f, v->name);
1215 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1216 "for variable %s."),
1217 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1221 for (i = 0; i < n_labels; i++)
1222 free (labels[i].label);
1230 for (i = 0; i < n_labels; i++)
1231 free (labels[i].label);
1238 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1239 non-NULL, uses that as the buffer; otherwise allocates at least
1240 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1243 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1247 if (buf == NULL && byte_cnt > 0 )
1248 buf = xmalloc (max (byte_cnt, min_alloc));
1250 if ( byte_cnt == 0 )
1254 if (1 != fread (buf, byte_cnt, 1, r->file))
1256 if (ferror (r->file))
1257 msg (ME, _("%s: Reading system file: %s."),
1258 handle_get_filename (r->fh), strerror (errno));
1260 corrupt_msg (ME, _("%s: Unexpected end of file."),
1261 handle_get_filename (r->fh));
1267 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1269 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1271 assert(byte_cnt > 0);
1273 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1275 msg (ME, _("%s: Seeking system file: %s."),
1276 handle_get_filename (r->fh), strerror (errno));
1280 /* Reads a document record, type 6, from system file R, and sets up
1281 the documents and n_documents fields in the associated
1284 read_documents (struct sfm_reader *r, struct dictionary *dict)
1289 if (dict_get_documents (dict) != NULL)
1290 lose ((ME, _("%s: System file contains multiple "
1291 "type 6 (document) records."),
1292 handle_get_filename (r->fh)));
1294 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1296 lose ((ME, _("%s: Number of document lines (%ld) "
1297 "must be greater than 0."),
1298 handle_get_filename (r->fh), (long) line_cnt));
1300 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1301 /* FIXME? Run through asciify. */
1302 if (documents == NULL)
1304 documents[80 * line_cnt] = '\0';
1305 dict_set_documents (dict, documents);
1315 /* Reads compressed data into H->BUF and sets other pointers
1316 appropriately. Returns nonzero only if both no errors occur and
1319 buffer_input (struct sfm_reader *r)
1324 r->buf = xmalloc (sizeof *r->buf * 128);
1325 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1326 if (ferror (r->file))
1328 msg (ME, _("%s: Error reading file: %s."),
1329 handle_get_filename (r->fh), strerror (errno));
1333 r->end = &r->buf[amt];
1337 /* Reads a single case consisting of compressed data from system
1338 file H into the array BUF[] according to reader R, and
1339 returns nonzero only if successful. */
1340 /* Data in system files is compressed in this manner. Data
1341 values are grouped into sets of eight ("octets"). Each value
1342 in an octet has one instruction byte that are output together.
1343 Each instruction byte gives a value for that byte or indicates
1344 that the value can be found following the instructions. */
1346 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1348 const unsigned char *p_end = r->x + sizeof (flt64);
1349 unsigned char *p = r->y;
1351 const flt64 *buf_beg = buf;
1352 const flt64 *buf_end = &buf[r->value_cnt];
1356 for (; p < p_end; p++){
1360 /* Code 0 is ignored. */
1363 /* Code 252 is end of file. */
1365 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1366 "in partial case."),
1367 handle_get_filename (r->fh)));
1370 /* Code 253 indicates that the value is stored explicitly
1371 following the instruction bytes. */
1372 if (r->ptr == NULL || r->ptr >= r->end)
1373 if (!buffer_input (r))
1375 lose ((ME, _("%s: Unexpected end of file."),
1376 handle_get_filename (r->fh)));
1379 memcpy (buf++, r->ptr++, sizeof *buf);
1384 /* Code 254 indicates a string that is all blanks. */
1385 memset (buf++, ' ', sizeof *buf);
1390 /* Code 255 indicates the system-missing value. */
1392 if (r->reverse_endian)
1399 /* Codes 1 through 251 inclusive are taken to indicate a
1400 value of (BYTE - BIAS), where BYTE is the byte's value
1401 and BIAS is the compression bias (generally 100.0). */
1402 *buf = *p - r->bias;
1403 if (r->reverse_endian)
1411 /* We have reached the end of this instruction octet. Read
1413 if (r->ptr == NULL || r->ptr >= r->end)
1414 if (!buffer_input (r))
1417 lose ((ME, _("%s: Unexpected end of file."),
1418 handle_get_filename (r->fh)));
1421 memcpy (r->x, r->ptr++, sizeof *buf);
1429 /* We have filled up an entire record. Update state and return
1435 /* We have been unsuccessful at filling a record, either through i/o
1436 error or through an end-of-file indication. Update state and
1437 return unsuccessfully. */
1441 /* Reads one case from READER's file into C. Returns nonzero
1442 only if successful. */
1444 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1446 if (!r->compressed && sizeof (flt64) == sizeof (double))
1448 /* Fast path: external and internal representations are the
1449 same, except possibly for endianness or SYSMIS. Read
1450 directly into the case's buffer, then fix up any minor
1451 details as needed. */
1452 if (!fread_ok (r, case_data_all_rw (c),
1453 sizeof (union value) * r->value_cnt))
1456 /* Fix up endianness if needed. */
1457 if (r->reverse_endian)
1461 for (i = 0; i < r->value_cnt; i++)
1462 if (r->vars[i].width == 0)
1463 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1466 /* Fix up SYSMIS values if needed.
1467 I don't think this will ever actually kick in, but it
1469 if (r->sysmis != SYSMIS)
1473 for (i = 0; i < r->value_cnt; i++)
1474 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1475 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1480 /* Slow path: internal and external representations differ.
1481 Read into a bounce buffer, then copy to C. */
1488 bounce_size = sizeof *bounce * r->value_cnt;
1489 bounce = bounce_cur = local_alloc (bounce_size);
1492 read_ok = fread_ok (r, bounce, bounce_size);
1494 read_ok = read_compressed_data (r, bounce);
1497 local_free (bounce);
1501 for (i = 0; i < r->value_cnt; i++)
1503 struct sfm_var *v = &r->vars[i];
1507 flt64 f = *bounce_cur++;
1508 if (r->reverse_endian)
1510 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1512 else if (v->width != -1)
1514 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1515 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1519 local_free (bounce);
1525 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1527 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1529 if (read_bytes == byte_cnt)
1533 if (ferror (r->file))
1534 msg (ME, _("%s: Reading system file: %s."),
1535 handle_get_filename (r->fh), strerror (errno));
1536 else if (read_bytes != 0)
1537 msg (ME, _("%s: Partial record at end of system file."),
1538 handle_get_filename (r->fh));