1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
63 /* File's special constants. */
68 /* Decompression buffer. */
69 flt64 *buf; /* Buffer data. */
70 flt64 *ptr; /* Current location in buffer. */
71 flt64 *end; /* End of buffer data. */
73 /* Compression instruction octet. */
74 unsigned char x[8]; /* Current instruction octet. */
75 unsigned char *y; /* Location in current instruction octet. */
78 /* A variable in a system file. */
81 int width; /* 0=numeric, otherwise string width. */
82 int fv; /* Index into case. */
87 /* Swap bytes *A and *B. */
89 bswap (unsigned char *a, unsigned char *b)
96 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
98 bswap_int32 (int32 *x_)
100 unsigned char *x = (unsigned char *) x_;
101 bswap (x + 0, x + 3);
102 bswap (x + 1, x + 2);
105 /* Reverse the byte order of 64-bit floating point *X. */
107 bswap_flt64 (flt64 *x_)
109 unsigned char *x = (unsigned char *) x_;
110 bswap (x + 0, x + 7);
111 bswap (x + 1, x + 6);
112 bswap (x + 2, x + 5);
113 bswap (x + 3, x + 4);
117 corrupt_msg (int class, const char *format,...)
118 PRINTF_FORMAT (2, 3);
120 /* Displays a corrupt sysfile error. */
122 corrupt_msg (int class, const char *format,...)
128 getl_location (&e.where.filename, &e.where.line_number);
129 e.title = _("corrupt system file: ");
131 va_start (args, format);
132 err_vmsg (&e, format, args);
136 /* Closes a system file after we're done with it. */
138 sfm_close_reader (struct sfm_reader *r)
144 fh_close (r->fh, "system file", "rs");
147 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
148 msg (ME, _("%s: Closing system file: %s."),
149 handle_get_filename (r->fh), strerror (errno));
157 /* Dictionary reader. */
159 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
161 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
164 static int read_header (struct sfm_reader *,
165 struct dictionary *, struct sfm_read_info *);
166 static int parse_format_spec (struct sfm_reader *, int32,
167 struct fmt_spec *, struct variable *);
168 static int read_value_labels (struct sfm_reader *, struct dictionary *,
169 struct variable **var_by_idx);
170 static int read_variables (struct sfm_reader *,
171 struct dictionary *, struct variable ***var_by_idx);
172 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
173 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
174 static int read_documents (struct sfm_reader *, struct dictionary *);
176 static int fread_ok (struct sfm_reader *, void *, size_t);
178 /* Displays the message X with corrupt_msg, then jumps to the error
186 /* Calls buf_read with the specified arguments, and jumps to
187 error if the read fails. */
188 #define assertive_buf_read(a,b,c,d) \
190 if (!buf_read (a,b,c,d)) \
194 /* Opens the system file designated by file handle FH for
195 reading. Reads the system file's dictionary into *DICT.
196 If INFO is non-null, then it receives additional info about the
199 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
200 struct sfm_read_info *info)
202 struct sfm_reader *r = NULL;
203 struct variable **var_by_idx = NULL;
205 *dict = dict_create ();
206 if (!fh_open (fh, "system file", "rs"))
209 /* Create and initialize reader. */
210 r = xmalloc (sizeof *r);
212 r->file = fn_open (handle_get_filename (fh), "rb");
214 r->reverse_endian = 0;
224 r->sysmis = -FLT64_MAX;
225 r->highest = FLT64_MAX;
226 r->lowest = second_lowest_flt64;
228 r->buf = r->ptr = r->end = NULL;
229 r->y = r->x + sizeof r->x;
231 /* Check that file open succeeded. */
234 msg (ME, _("An error occurred while opening \"%s\" for reading "
235 "as a system file: %s."),
236 handle_get_filename (r->fh), strerror (errno));
241 /* Read header and variables. */
242 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
246 /* Handle weighting. */
247 if (r->weight_idx != -1)
249 struct variable *weight_var;
251 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
252 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
253 "and number of elements per case (%d)."),
254 handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
257 weight_var = var_by_idx[r->weight_idx];
259 if (weight_var == NULL)
261 _("%s: Weighting variable may not be a continuation of "
262 "a long string variable."), handle_get_filename (fh)));
263 else if (weight_var->type == ALPHA)
264 lose ((ME, _("%s: Weighting variable may not be a string variable."),
265 handle_get_filename (fh)));
267 dict_set_weight (*dict, weight_var);
270 dict_set_weight (*dict, NULL);
272 /* Read records of types 3, 4, 6, and 7. */
277 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
278 if (r->reverse_endian)
279 bswap_int32 (&rec_type);
284 if (!read_value_labels (r, *dict, var_by_idx))
289 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
290 "records must always immediately follow type 3 "
292 handle_get_filename (r->fh)));
295 if (!read_documents (r, *dict))
312 assertive_buf_read (r, &data, sizeof data, 0);
313 if (r->reverse_endian)
315 bswap_int32 (&data.subtype);
316 bswap_int32 (&data.size);
317 bswap_int32 (&data.count);
319 bytes = data.size * data.count;
320 if (bytes < data.size || bytes < data.count)
321 lose ((ME, "%s: Record type %d subtype %d too large.",
322 handle_get_filename (r->fh), rec_type, data.subtype));
324 switch (data.subtype)
327 if (!read_machine_int32_info (r, data.size, data.count))
332 if (!read_machine_flt64_info (r, data.size, data.count))
337 case 6: /* ?? Used by SPSS 8.0. */
341 case 11: /* Variable display parameters */
343 const int n_vars = data.count / 3 ;
345 if ( data.count % 3 )
347 msg (MW, _("%s: Invalid subrecord length. "
348 "Record: 7; Subrecord: 11"),
349 handle_get_filename (r->fh));
353 for ( i = 0 ; i < n_vars ; ++i )
365 assertive_buf_read (r, ¶ms, sizeof(params), 0);
367 v = dict_get_var(*dict, i);
369 v->measure = params.measure;
370 v->display_width = params.width;
371 v->alignment = params.align;
376 case 13: /* SPSS 12.0 Long variable name map */
378 char *buf, *short_name, *save_ptr;
382 buf = xmalloc (bytes + 1);
383 if (!buf_read (r, buf, bytes, 0))
391 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
393 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
395 char *long_name = strtok_r (NULL, "\t", &save_ptr);
398 /* Validate long name. */
399 if (long_name == NULL)
401 msg (MW, _("%s: Trailing garbage in long variable "
403 handle_get_filename (r->fh));
406 if (!var_is_valid_name (long_name, false))
408 msg (MW, _("%s: Long variable mapping to invalid "
409 "variable name `%s'."),
410 handle_get_filename (r->fh), long_name);
414 /* Find variable using short name. */
415 v = dict_lookup_var (*dict, short_name);
418 msg (MW, _("%s: Long variable mapping for "
419 "nonexistent variable %s."),
420 handle_get_filename (r->fh), short_name);
425 Renaming a variable may clear the short
426 name, but we want to retain it, so
427 re-set it explicitly. */
428 dict_rename_var (*dict, v, long_name);
429 var_set_short_name (v, short_name);
431 /* For compatability, make sure dictionary
432 is in long variable name map order. In
433 the common case, this has no effect,
434 because the dictionary and the long
435 variable name map are already in the
437 dict_reorder_var (*dict, v, idx);
446 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
447 "encountered in system file."),
448 handle_get_filename (r->fh), data.subtype);
454 void *x = buf_read (r, NULL, data.size * data.count, 0);
466 assertive_buf_read (r, &filler, sizeof filler, 0);
471 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
472 handle_get_filename (r->fh), rec_type);
477 /* Come here on successful completion. */
482 /* Come here on unsuccessful completion. */
483 sfm_close_reader (r);
487 dict_destroy (*dict);
493 /* Read record type 7, subtype 3. */
495 read_machine_int32_info (struct sfm_reader *r, int size, int count)
502 if (size != sizeof (int32) || count != 8)
503 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
504 "subtype 3. Expected size %d, count 8."),
505 handle_get_filename (r->fh), size, count, sizeof (int32)));
507 assertive_buf_read (r, data, sizeof data, 0);
508 if (r->reverse_endian)
509 for (i = 0; i < 8; i++)
510 bswap_int32 (&data[i]);
514 lose ((ME, _("%s: Floating-point representation in system file is not "
515 "IEEE-754. PSPP cannot convert between floating-point "
517 handle_get_filename (r->fh)));
519 #error Add support for your floating-point format.
522 #ifdef WORDS_BIGENDIAN
527 if (r->reverse_endian)
529 if (file_bigendian ^ (data[6] == 1))
530 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
531 "endianness intuited from file header (%s)."),
532 handle_get_filename (r->fh),
533 file_bigendian ? _("big-endian") : _("little-endian"),
534 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
537 /* PORTME: Character representation code. */
538 if (data[7] != 2 && data[7] != 3)
539 lose ((ME, _("%s: File-indicated character representation code (%s) is "
541 handle_get_filename (r->fh),
542 (data[7] == 1 ? "EBCDIC"
543 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
551 /* Read record type 7, subtype 4. */
553 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
558 if (size != sizeof (flt64) || count != 3)
559 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
560 "subtype 4. Expected size %d, count 8."),
561 handle_get_filename (r->fh), size, count, sizeof (flt64)));
563 assertive_buf_read (r, data, sizeof data, 0);
564 if (r->reverse_endian)
565 for (i = 0; i < 3; i++)
566 bswap_flt64 (&data[i]);
568 if (data[0] != SYSMIS || data[1] != FLT64_MAX
569 || data[2] != second_lowest_flt64)
572 r->highest = data[1];
574 msg (MW, _("%s: File-indicated value is different from internal value "
575 "for at least one of the three system values. SYSMIS: "
576 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
578 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
579 (double) data[1], (double) FLT64_MAX,
580 (double) data[2], (double) second_lowest_flt64);
590 read_header (struct sfm_reader *r,
591 struct dictionary *dict, struct sfm_read_info *info)
593 struct sysfile_header hdr; /* Disk buffer. */
594 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
595 int skip_amt = 0; /* Amount of product name to omit. */
598 /* Read header, check magic. */
599 assertive_buf_read (r, &hdr, sizeof hdr, 0);
600 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
601 lose ((ME, _("%s: Bad magic. Proper system files begin with "
602 "the four characters `$FL2'. This file will not be read."),
603 handle_get_filename (r->fh)));
605 /* Check eye-catcher string. */
606 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
607 for (i = 0; i < 60; i++)
608 if (!isprint ((unsigned char) prod_name[i]))
610 for (i = 59; i >= 0; i--)
611 if (!isgraph ((unsigned char) prod_name[i]))
616 prod_name[60] = '\0';
620 static const char *prefix[N_PREFIXES] =
622 "@(#) SPSS DATA FILE",
628 for (i = 0; i < N_PREFIXES; i++)
629 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
631 skip_amt = strlen (prefix[i]);
636 /* Check endianness. */
637 if (hdr.layout_code == 2)
638 r->reverse_endian = 0;
641 bswap_int32 (&hdr.layout_code);
642 if (hdr.layout_code != 2)
643 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
644 "should be 2, in big-endian or little-endian format."),
645 handle_get_filename (r->fh), hdr.layout_code));
647 r->reverse_endian = 1;
648 bswap_int32 (&hdr.case_size);
649 bswap_int32 (&hdr.compress);
650 bswap_int32 (&hdr.weight_idx);
651 bswap_int32 (&hdr.case_cnt);
652 bswap_flt64 (&hdr.bias);
656 /* Copy basic info and verify correctness. */
657 r->value_cnt = hdr.case_size;
659 /* If value count is rediculous, then force it to -1 (a sentinel value) */
660 if ( r->value_cnt < 0 ||
661 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
664 r->compressed = hdr.compress;
666 r->weight_idx = hdr.weight_idx - 1;
668 r->case_cnt = hdr.case_cnt;
669 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
671 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
672 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
675 if (r->bias != 100.0)
676 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
678 handle_get_filename (r->fh), r->bias);
680 /* Make a file label only on the condition that the given label is
681 not all spaces or nulls. */
685 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
686 if (!isspace ((unsigned char) hdr.file_label[i])
687 && hdr.file_label[i] != 0)
689 char *label = xmalloc (i + 2);
690 memcpy (label, hdr.file_label, i + 1);
692 dict_set_label (dict, label);
702 memcpy (info->creation_date, hdr.creation_date, 9);
703 info->creation_date[9] = 0;
705 memcpy (info->creation_time, hdr.creation_time, 8);
706 info->creation_time[8] = 0;
708 #ifdef WORDS_BIGENDIAN
709 info->big_endian = !r->reverse_endian;
711 info->big_endian = r->reverse_endian;
714 info->compressed = hdr.compress;
716 info->case_cnt = hdr.case_cnt;
718 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
719 if (isgraph ((unsigned char) *cp))
721 strcpy (info->product, cp);
730 /* Reads most of the dictionary from file H; also fills in the
731 associated VAR_BY_IDX array. */
733 read_variables (struct sfm_reader *r,
734 struct dictionary *dict, struct variable ***var_by_idx)
738 struct sysfile_variable sv; /* Disk buffer. */
739 int long_string_count = 0; /* # of long string continuation
740 records still expected. */
741 int next_value = 0; /* Index to next `value' structure. */
747 /* Pre-allocate variables. */
748 if ( r->value_cnt != -1 )
750 *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
751 r->vars = xmalloc( r->value_cnt * sizeof (*r->vars) );
755 /* Read in the entry for each variable and use the info to
756 initialize the dictionary. */
760 char name[SHORT_NAME_LEN + 1];
764 if ( r->value_cnt != -1 && i >= r->value_cnt )
767 assertive_buf_read (r, &sv, sizeof sv, 0);
769 if (r->reverse_endian)
771 bswap_int32 (&sv.rec_type);
772 bswap_int32 (&sv.type);
773 bswap_int32 (&sv.has_var_label);
774 bswap_int32 (&sv.n_missing_values);
775 bswap_int32 (&sv.print);
776 bswap_int32 (&sv.write);
779 /* We've come to the end of the variable entries */
780 if (sv.rec_type != 2)
782 buf_unread(r, sizeof sv);
787 if ( -1 == r->value_cnt )
789 *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i + 1));
790 r->vars = xrealloc(r->vars, (i + 1) * sizeof (*r->vars) );
793 /* If there was a long string previously, make sure that the
794 continuations are present; otherwise make sure there aren't
796 if (long_string_count)
799 lose ((ME, _("%s: position %d: String variable does not have "
800 "proper number of continuation records."),
801 handle_get_filename (r->fh), i));
804 r->vars[i].width = -1;
805 (*var_by_idx)[i] = NULL;
809 else if (sv.type == -1)
810 lose ((ME, _("%s: position %d: Superfluous long string continuation "
812 handle_get_filename (r->fh), i));
814 /* Check fields for validity. */
815 if (sv.type < 0 || sv.type > 255)
816 lose ((ME, _("%s: position %d: Bad variable type code %d."),
817 handle_get_filename (r->fh), i, sv.type));
818 if (sv.has_var_label != 0 && sv.has_var_label != 1)
819 lose ((ME, _("%s: position %d: Variable label indicator field is not "
820 "0 or 1."), handle_get_filename (r->fh), i));
821 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
822 || sv.n_missing_values == -1)
823 lose ((ME, _("%s: position %d: Missing value indicator field is not "
824 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
826 /* Copy first character of variable name. */
827 if (!isalpha ((unsigned char) sv.name[0])
828 && sv.name[0] != '@' && sv.name[0] != '#')
829 lose ((ME, _("%s: position %d: Variable name begins with invalid "
831 handle_get_filename (r->fh), i));
832 if (islower ((unsigned char) sv.name[0]))
833 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
835 handle_get_filename (r->fh), i, sv.name[0]);
836 if (sv.name[0] == '#')
837 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
838 "(`#'). Scratch variables should not appear in system "
840 handle_get_filename (r->fh), i);
841 name[0] = toupper ((unsigned char) (sv.name[0]));
843 /* Copy remaining characters of variable name. */
844 for (j = 1; j < SHORT_NAME_LEN; j++)
846 int c = (unsigned char) sv.name[j];
850 else if (islower (c))
852 msg (MW, _("%s: position %d: Variable name character %d is "
853 "lowercase letter %c."),
854 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
855 name[j] = toupper ((unsigned char) (c));
857 else if (isalnum (c) || c == '.' || c == '@'
858 || c == '#' || c == '$' || c == '_')
861 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
863 handle_get_filename (r->fh), i, c, c));
867 /* Create variable. */
868 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
870 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
871 handle_get_filename (r->fh), name));
872 var_set_short_name (vv, vv->name);
874 /* Case reading data. */
875 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
876 long_string_count = nv - 1;
879 /* Get variable label, if any. */
880 if (sv.has_var_label == 1)
885 /* Read length of label. */
886 assertive_buf_read (r, &len, sizeof len, 0);
887 if (r->reverse_endian)
891 if (len < 0 || len > 255)
892 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
894 handle_get_filename (r->fh), vv->name, len));
898 /* Read label into variable structure. */
899 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
900 if (vv->label == NULL)
902 vv->label[len] = '\0';
906 /* Set missing values. */
907 if (sv.n_missing_values != 0)
911 if (vv->width > MAX_SHORT_STRING)
912 lose ((ME, _("%s: Long string variable %s may not have missing "
914 handle_get_filename (r->fh), vv->name));
916 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
918 if (r->reverse_endian && vv->type == NUMERIC)
919 for (j = 0; j < abs (sv.n_missing_values); j++)
920 bswap_flt64 (&mv[j]);
922 if (sv.n_missing_values > 0)
924 vv->miss_type = sv.n_missing_values;
925 if (vv->type == NUMERIC)
926 for (j = 0; j < sv.n_missing_values; j++)
927 vv->missing[j].f = mv[j];
929 for (j = 0; j < sv.n_missing_values; j++)
930 memcpy (vv->missing[j].s, &mv[j], vv->width);
936 if (vv->type == ALPHA)
937 lose ((ME, _("%s: String variable %s may not have missing "
938 "values specified as a range."),
939 handle_get_filename (r->fh), vv->name));
941 if (mv[0] == r->lowest)
943 vv->miss_type = MISSING_LOW;
944 vv->missing[x++].f = mv[1];
946 else if (mv[1] == r->highest)
948 vv->miss_type = MISSING_HIGH;
949 vv->missing[x++].f = mv[0];
953 vv->miss_type = MISSING_RANGE;
954 vv->missing[x++].f = mv[0];
955 vv->missing[x++].f = mv[1];
958 if (sv.n_missing_values == -3)
961 vv->missing[x++].f = mv[2];
966 vv->miss_type = MISSING_NONE;
968 if (!parse_format_spec (r, sv.print, &vv->print, vv)
969 || !parse_format_spec (r, sv.write, &vv->write, vv))
972 r->vars[i].width = vv->width;
973 r->vars[i].fv = vv->fv;
977 /* Some consistency checks. */
978 if (long_string_count != 0)
979 lose ((ME, _("%s: Long string continuation records omitted at end of "
981 handle_get_filename (r->fh)));
983 if (next_value != r->value_cnt)
984 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
985 "%d were read from file."),
986 handle_get_filename (r->fh), r->value_cnt, next_value);
995 /* Translates the format spec from sysfile format to internal
998 parse_format_spec (struct sfm_reader *r, int32 s,
999 struct fmt_spec *f, struct variable *v)
1001 f->type = translate_fmt ((s >> 16) & 0xff);
1003 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1004 handle_get_filename (r->fh), (s >> 16) & 0xff));
1005 f->w = (s >> 8) & 0xff;
1008 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1009 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1010 handle_get_filename (r->fh),
1011 v->type == ALPHA ? _("String") : _("Numeric"),
1013 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1014 formats[f->type].name));
1016 if (!check_output_specifier (f, false)
1017 || !check_specifier_width (f, v->width, false))
1019 msg (ME, _("%s variable %s has invalid format specifier %s."),
1020 v->type == NUMERIC ? _("Numeric") : _("String"),
1021 v->name, fmt_to_string (f));
1022 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1030 /* Reads value labels from sysfile H and inserts them into the
1031 associated dictionary. */
1033 read_value_labels (struct sfm_reader *r,
1034 struct dictionary *dict, struct variable **var_by_idx)
1038 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
1039 union value value; /* Value. */
1040 char *label; /* Null-terminated label string. */
1043 struct label *labels = NULL;
1044 int32 n_labels; /* Number of labels. */
1046 struct variable **var = NULL; /* Associated variables. */
1047 int32 n_vars; /* Number of associated variables. */
1051 /* First step: read the contents of the type 3 record and record its
1052 contents. Note that we can't do much with the data since we
1053 don't know yet whether it is of numeric or string type. */
1055 /* Read number of labels. */
1056 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1057 if (r->reverse_endian)
1058 bswap_int32 (&n_labels);
1060 /* Allocate memory. */
1061 labels = xmalloc (n_labels * sizeof *labels);
1062 for (i = 0; i < n_labels; i++)
1063 labels[i].label = NULL;
1065 /* Read each value/label tuple into labels[]. */
1066 for (i = 0; i < n_labels; i++)
1068 struct label *label = labels + i;
1069 unsigned char label_len;
1073 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1075 /* Read label length. */
1076 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1077 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1079 /* Read label, padding. */
1080 label->label = xmalloc (padded_len + 1);
1081 assertive_buf_read (r, label->label, padded_len - 1, 0);
1082 label->label[label_len] = 0;
1085 /* Second step: Read the type 4 record that has the list of
1086 variables to which the value labels are to be applied. */
1088 /* Read record type of type 4 record. */
1092 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1093 if (r->reverse_endian)
1094 bswap_int32 (&rec_type);
1097 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1098 "follow value label record (type 3) as it should."),
1099 handle_get_filename (r->fh)));
1102 /* Read number of variables associated with value label from type 4
1104 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1105 if (r->reverse_endian)
1106 bswap_int32 (&n_vars);
1107 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1108 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1109 "is not between 1 and the number of variables (%d)."),
1110 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1112 /* Read the list of variables. */
1113 var = xmalloc (n_vars * sizeof *var);
1114 for (i = 0; i < n_vars; i++)
1119 /* Read variable index, check range. */
1120 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1121 if (r->reverse_endian)
1122 bswap_int32 (&var_idx);
1123 if (var_idx < 1 || var_idx > r->value_cnt)
1124 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1125 "not between 1 and the number of values (%d)."),
1126 handle_get_filename (r->fh), var_idx, r->value_cnt));
1128 /* Make sure it's a real variable. */
1129 v = var_by_idx[var_idx - 1];
1131 lose ((ME, _("%s: Variable index associated with value label (%d) "
1132 "refers to a continuation of a string variable, not to "
1133 "an actual variable."),
1134 handle_get_filename (r->fh), var_idx));
1135 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1136 lose ((ME, _("%s: Value labels are not allowed on long string "
1138 handle_get_filename (r->fh), v->name));
1140 /* Add it to the list of variables. */
1144 /* Type check the variables. */
1145 for (i = 1; i < n_vars; i++)
1146 if (var[i]->type != var[0]->type)
1147 lose ((ME, _("%s: Variables associated with value label are not all of "
1148 "identical type. Variable %s has %s type, but variable "
1150 handle_get_filename (r->fh),
1151 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1152 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1154 /* Fill in labels[].value, now that we know the desired type. */
1155 for (i = 0; i < n_labels; i++)
1157 struct label *label = labels + i;
1159 if (var[0]->type == ALPHA)
1161 const int copy_len = min (sizeof (label->raw_value),
1162 sizeof (label->label));
1163 memcpy (label->value.s, label->raw_value, copy_len);
1166 assert (sizeof f == sizeof label->raw_value);
1167 memcpy (&f, label->raw_value, sizeof f);
1168 if (r->reverse_endian)
1174 /* Assign the value_label's to each variable. */
1175 for (i = 0; i < n_vars; i++)
1177 struct variable *v = var[i];
1180 /* Add each label to the variable. */
1181 for (j = 0; j < n_labels; j++)
1183 struct label *label = labels + j;
1184 if (!val_labs_replace (v->val_labs, label->value, label->label))
1187 if (var[0]->type == NUMERIC)
1188 msg (MW, _("%s: File contains duplicate label for value %g for "
1190 handle_get_filename (r->fh), label->value.f, v->name);
1192 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1193 "for variable %s."),
1194 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1198 for (i = 0; i < n_labels; i++)
1199 free (labels[i].label);
1207 for (i = 0; i < n_labels; i++)
1208 free (labels[i].label);
1215 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1216 non-NULL, uses that as the buffer; otherwise allocates at least
1217 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1220 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1224 if (buf == NULL && byte_cnt > 0 )
1225 buf = xmalloc (max (byte_cnt, min_alloc));
1227 if ( byte_cnt == 0 )
1231 if (1 != fread (buf, byte_cnt, 1, r->file))
1233 if (ferror (r->file))
1234 msg (ME, _("%s: Reading system file: %s."),
1235 handle_get_filename (r->fh), strerror (errno));
1237 corrupt_msg (ME, _("%s: Unexpected end of file."),
1238 handle_get_filename (r->fh));
1244 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1246 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1248 assert(byte_cnt > 0);
1250 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1252 msg (ME, _("%s: Seeking system file: %s."),
1253 handle_get_filename (r->fh), strerror (errno));
1257 /* Reads a document record, type 6, from system file R, and sets up
1258 the documents and n_documents fields in the associated
1261 read_documents (struct sfm_reader *r, struct dictionary *dict)
1266 if (dict_get_documents (dict) != NULL)
1267 lose ((ME, _("%s: System file contains multiple "
1268 "type 6 (document) records."),
1269 handle_get_filename (r->fh)));
1271 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1273 lose ((ME, _("%s: Number of document lines (%ld) "
1274 "must be greater than 0."),
1275 handle_get_filename (r->fh), (long) line_cnt));
1277 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1278 /* FIXME? Run through asciify. */
1279 if (documents == NULL)
1281 documents[80 * line_cnt] = '\0';
1282 dict_set_documents (dict, documents);
1292 /* Reads compressed data into H->BUF and sets other pointers
1293 appropriately. Returns nonzero only if both no errors occur and
1296 buffer_input (struct sfm_reader *r)
1301 r->buf = xmalloc (sizeof *r->buf * 128);
1302 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1303 if (ferror (r->file))
1305 msg (ME, _("%s: Error reading file: %s."),
1306 handle_get_filename (r->fh), strerror (errno));
1310 r->end = &r->buf[amt];
1314 /* Reads a single case consisting of compressed data from system
1315 file H into the array BUF[] according to reader R, and
1316 returns nonzero only if successful. */
1317 /* Data in system files is compressed in this manner. Data
1318 values are grouped into sets of eight ("octets"). Each value
1319 in an octet has one instruction byte that are output together.
1320 Each instruction byte gives a value for that byte or indicates
1321 that the value can be found following the instructions. */
1323 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1325 const unsigned char *p_end = r->x + sizeof (flt64);
1326 unsigned char *p = r->y;
1328 const flt64 *buf_beg = buf;
1329 const flt64 *buf_end = &buf[r->value_cnt];
1333 for (; p < p_end; p++){
1337 /* Code 0 is ignored. */
1340 /* Code 252 is end of file. */
1342 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1343 "in partial case."),
1344 handle_get_filename (r->fh)));
1347 /* Code 253 indicates that the value is stored explicitly
1348 following the instruction bytes. */
1349 if (r->ptr == NULL || r->ptr >= r->end)
1350 if (!buffer_input (r))
1352 lose ((ME, _("%s: Unexpected end of file."),
1353 handle_get_filename (r->fh)));
1356 memcpy (buf++, r->ptr++, sizeof *buf);
1361 /* Code 254 indicates a string that is all blanks. */
1362 memset (buf++, ' ', sizeof *buf);
1367 /* Code 255 indicates the system-missing value. */
1369 if (r->reverse_endian)
1376 /* Codes 1 through 251 inclusive are taken to indicate a
1377 value of (BYTE - BIAS), where BYTE is the byte's value
1378 and BIAS is the compression bias (generally 100.0). */
1379 *buf = *p - r->bias;
1380 if (r->reverse_endian)
1388 /* We have reached the end of this instruction octet. Read
1390 if (r->ptr == NULL || r->ptr >= r->end)
1391 if (!buffer_input (r))
1394 lose ((ME, _("%s: Unexpected end of file."),
1395 handle_get_filename (r->fh)));
1398 memcpy (r->x, r->ptr++, sizeof *buf);
1406 /* We have filled up an entire record. Update state and return
1412 /* We have been unsuccessful at filling a record, either through i/o
1413 error or through an end-of-file indication. Update state and
1414 return unsuccessfully. */
1418 /* Reads one case from READER's file into C. Returns nonzero
1419 only if successful. */
1421 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1423 if (!r->compressed && sizeof (flt64) == sizeof (double))
1425 /* Fast path: external and internal representations are the
1426 same, except possibly for endianness or SYSMIS. Read
1427 directly into the case's buffer, then fix up any minor
1428 details as needed. */
1429 if (!fread_ok (r, case_data_all_rw (c),
1430 sizeof (union value) * r->value_cnt))
1433 /* Fix up endianness if needed. */
1434 if (r->reverse_endian)
1438 for (i = 0; i < r->value_cnt; i++)
1439 if (r->vars[i].width == 0)
1440 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1443 /* Fix up SYSMIS values if needed.
1444 I don't think this will ever actually kick in, but it
1446 if (r->sysmis != SYSMIS)
1450 for (i = 0; i < r->value_cnt; i++)
1451 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1452 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1457 /* Slow path: internal and external representations differ.
1458 Read into a bounce buffer, then copy to C. */
1465 bounce_size = sizeof *bounce * r->value_cnt;
1466 bounce = bounce_cur = local_alloc (bounce_size);
1469 read_ok = fread_ok (r, bounce, bounce_size);
1471 read_ok = read_compressed_data (r, bounce);
1474 local_free (bounce);
1478 for (i = 0; i < r->value_cnt; i++)
1480 struct sfm_var *v = &r->vars[i];
1484 flt64 f = *bounce_cur++;
1485 if (r->reverse_endian)
1487 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1489 else if (v->width != -1)
1491 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1492 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1496 local_free (bounce);
1502 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1504 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1506 if (read_bytes == byte_cnt)
1510 if (ferror (r->file))
1511 msg (ME, _("%s: Reading system file: %s."),
1512 handle_get_filename (r->fh), strerror (errno));
1513 else if (read_bytes != 0)
1514 msg (ME, _("%s: Partial record at end of system file."),
1515 handle_get_filename (r->fh));