1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
129 getl_location (&e.where.filename, &e.where.line_number);
130 e.title = _("corrupt system file: ");
132 va_start (args, format);
133 err_vmsg (&e, format, args);
137 /* Closes a system file after we're done with it. */
139 sfm_close_reader (struct sfm_reader *r)
145 fh_close (r->fh, "system file", "rs");
148 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 handle_get_filename (r->fh), strerror (errno));
158 /* Dictionary reader. */
160 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
162 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
165 static int read_header (struct sfm_reader *,
166 struct dictionary *, struct sfm_read_info *);
167 static int parse_format_spec (struct sfm_reader *, int32,
168 struct fmt_spec *, struct variable *);
169 static int read_value_labels (struct sfm_reader *, struct dictionary *,
170 struct variable **var_by_idx);
171 static int read_variables (struct sfm_reader *,
172 struct dictionary *, struct variable ***var_by_idx);
173 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
174 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
175 static int read_documents (struct sfm_reader *, struct dictionary *);
177 static int fread_ok (struct sfm_reader *, void *, size_t);
179 /* Displays the message X with corrupt_msg, then jumps to the error
187 /* Calls buf_read with the specified arguments, and jumps to
188 error if the read fails. */
189 #define assertive_buf_read(a,b,c,d) \
191 if (!buf_read (a,b,c,d)) \
195 /* Opens the system file designated by file handle FH for
196 reading. Reads the system file's dictionary into *DICT.
197 If INFO is non-null, then it receives additional info about the
200 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
201 struct sfm_read_info *info)
203 struct sfm_reader *r = NULL;
204 struct variable **var_by_idx = NULL;
206 *dict = dict_create ();
207 if (!fh_open (fh, "system file", "rs"))
210 /* Create and initialize reader. */
211 r = xmalloc (sizeof *r);
213 r->file = fn_open (handle_get_filename (fh), "rb");
215 r->reverse_endian = 0;
226 r->sysmis = -FLT64_MAX;
227 r->highest = FLT64_MAX;
228 r->lowest = second_lowest_flt64;
230 r->buf = r->ptr = r->end = NULL;
231 r->y = r->x + sizeof r->x;
233 /* Check that file open succeeded. */
236 msg (ME, _("An error occurred while opening \"%s\" for reading "
237 "as a system file: %s."),
238 handle_get_filename (r->fh), strerror (errno));
243 /* Read header and variables. */
244 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
247 /* Handle weighting. */
248 if (r->weight_idx != -1)
250 struct variable *weight_var;
252 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
253 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
254 "and number of elements per case (%d)."),
255 handle_get_filename (r->fh), r->weight_idx, r->value_cnt));
258 weight_var = var_by_idx[r->weight_idx];
260 if (weight_var == NULL)
262 _("%s: Weighting variable may not be a continuation of "
263 "a long string variable."), handle_get_filename (fh)));
264 else if (weight_var->type == ALPHA)
265 lose ((ME, _("%s: Weighting variable may not be a string variable."),
266 handle_get_filename (fh)));
268 dict_set_weight (*dict, weight_var);
271 dict_set_weight (*dict, NULL);
273 /* Read records of types 3, 4, 6, and 7. */
278 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
279 if (r->reverse_endian)
280 bswap_int32 (&rec_type);
285 if (!read_value_labels (r, *dict, var_by_idx))
290 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
291 "records must always immediately follow type 3 "
293 handle_get_filename (r->fh)));
296 if (!read_documents (r, *dict))
313 assertive_buf_read (r, &data, sizeof data, 0);
314 if (r->reverse_endian)
316 bswap_int32 (&data.subtype);
317 bswap_int32 (&data.size);
318 bswap_int32 (&data.count);
320 bytes = data.size * data.count;
321 if (bytes < data.size || bytes < data.count)
322 lose ((ME, "%s: Record type %d subtype %d too large.",
323 handle_get_filename (r->fh), rec_type, data.subtype));
325 switch (data.subtype)
328 if (!read_machine_int32_info (r, data.size, data.count))
333 if (!read_machine_flt64_info (r, data.size, data.count))
338 case 6: /* ?? Used by SPSS 8.0. */
342 case 11: /* Variable display parameters */
344 const int n_vars = data.count / 3 ;
346 if ( data.count % 3 )
348 msg (MW, _("%s: Invalid subrecord length. "
349 "Record: 7; Subrecord: 11"),
350 handle_get_filename (r->fh));
354 for ( i = 0 ; i < n_vars ; ++i )
366 assertive_buf_read (r, ¶ms, sizeof(params), 0);
368 v = dict_get_var(*dict, i);
370 v->measure = params.measure;
371 v->display_width = params.width;
372 v->alignment = params.align;
377 case 13: /* SPSS 12.0 Long variable name map */
379 char *buf, *short_name, *save_ptr;
383 buf = xmalloc (bytes + 1);
384 if (!buf_read (r, buf, bytes, 0))
392 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
394 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
396 char *long_name = strtok_r (NULL, "\t", &save_ptr);
399 /* Validate long name. */
400 if (long_name == NULL)
402 msg (MW, _("%s: Trailing garbage in long variable "
404 handle_get_filename (r->fh));
407 if (!var_is_valid_name (long_name, false))
409 msg (MW, _("%s: Long variable mapping to invalid "
410 "variable name `%s'."),
411 handle_get_filename (r->fh), long_name);
415 /* Find variable using short name. */
416 v = dict_lookup_var (*dict, short_name);
419 msg (MW, _("%s: Long variable mapping for "
420 "nonexistent variable %s."),
421 handle_get_filename (r->fh), short_name);
426 Renaming a variable may clear the short
427 name, but we want to retain it, so
428 re-set it explicitly. */
429 dict_rename_var (*dict, v, long_name);
430 var_set_short_name (v, short_name);
432 /* For compatability, make sure dictionary
433 is in long variable name map order. In
434 the common case, this has no effect,
435 because the dictionary and the long
436 variable name map are already in the
438 dict_reorder_var (*dict, v, idx);
447 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
448 "encountered in system file."),
449 handle_get_filename (r->fh), data.subtype);
455 void *x = buf_read (r, NULL, data.size * data.count, 0);
467 assertive_buf_read (r, &filler, sizeof filler, 0);
472 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
473 handle_get_filename (r->fh), rec_type);
478 /* Come here on successful completion. */
483 /* Come here on unsuccessful completion. */
484 sfm_close_reader (r);
488 dict_destroy (*dict);
494 /* Read record type 7, subtype 3. */
496 read_machine_int32_info (struct sfm_reader *r, int size, int count)
503 if (size != sizeof (int32) || count != 8)
504 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
505 "subtype 3. Expected size %d, count 8."),
506 handle_get_filename (r->fh), size, count, sizeof (int32)));
508 assertive_buf_read (r, data, sizeof data, 0);
509 if (r->reverse_endian)
510 for (i = 0; i < 8; i++)
511 bswap_int32 (&data[i]);
515 lose ((ME, _("%s: Floating-point representation in system file is not "
516 "IEEE-754. PSPP cannot convert between floating-point "
518 handle_get_filename (r->fh)));
520 #error Add support for your floating-point format.
523 #ifdef WORDS_BIGENDIAN
528 if (r->reverse_endian)
530 if (file_bigendian ^ (data[6] == 1))
531 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
532 "endianness intuited from file header (%s)."),
533 handle_get_filename (r->fh),
534 file_bigendian ? _("big-endian") : _("little-endian"),
535 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
538 /* PORTME: Character representation code. */
539 if (data[7] != 2 && data[7] != 3)
540 lose ((ME, _("%s: File-indicated character representation code (%s) is "
542 handle_get_filename (r->fh),
543 (data[7] == 1 ? "EBCDIC"
544 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
552 /* Read record type 7, subtype 4. */
554 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
559 if (size != sizeof (flt64) || count != 3)
560 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
561 "subtype 4. Expected size %d, count 8."),
562 handle_get_filename (r->fh), size, count, sizeof (flt64)));
564 assertive_buf_read (r, data, sizeof data, 0);
565 if (r->reverse_endian)
566 for (i = 0; i < 3; i++)
567 bswap_flt64 (&data[i]);
569 if (data[0] != SYSMIS || data[1] != FLT64_MAX
570 || data[2] != second_lowest_flt64)
573 r->highest = data[1];
575 msg (MW, _("%s: File-indicated value is different from internal value "
576 "for at least one of the three system values. SYSMIS: "
577 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
579 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
580 (double) data[1], (double) FLT64_MAX,
581 (double) data[2], (double) second_lowest_flt64);
591 read_header (struct sfm_reader *r,
592 struct dictionary *dict, struct sfm_read_info *info)
594 struct sysfile_header hdr; /* Disk buffer. */
595 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
596 int skip_amt = 0; /* Amount of product name to omit. */
599 /* Read header, check magic. */
600 assertive_buf_read (r, &hdr, sizeof hdr, 0);
601 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
602 lose ((ME, _("%s: Bad magic. Proper system files begin with "
603 "the four characters `$FL2'. This file will not be read."),
604 handle_get_filename (r->fh)));
606 /* Check eye-catcher string. */
607 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
608 for (i = 0; i < 60; i++)
609 if (!isprint ((unsigned char) prod_name[i]))
611 for (i = 59; i >= 0; i--)
612 if (!isgraph ((unsigned char) prod_name[i]))
617 prod_name[60] = '\0';
621 static const char *prefix[N_PREFIXES] =
623 "@(#) SPSS DATA FILE",
629 for (i = 0; i < N_PREFIXES; i++)
630 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
632 skip_amt = strlen (prefix[i]);
637 /* Check endianness. */
638 if (hdr.layout_code == 2)
639 r->reverse_endian = 0;
642 bswap_int32 (&hdr.layout_code);
643 if (hdr.layout_code != 2)
644 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
645 "should be 2, in big-endian or little-endian format."),
646 handle_get_filename (r->fh), hdr.layout_code));
648 r->reverse_endian = 1;
649 bswap_int32 (&hdr.case_size);
650 bswap_int32 (&hdr.compress);
651 bswap_int32 (&hdr.weight_idx);
652 bswap_int32 (&hdr.case_cnt);
653 bswap_flt64 (&hdr.bias);
657 /* Copy basic info and verify correctness. */
658 r->value_cnt = hdr.case_size;
660 /* If value count is rediculous, then force it to -1 (a sentinel value) */
661 if ( r->value_cnt < 0 ||
662 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
665 r->compressed = hdr.compress;
667 r->weight_idx = hdr.weight_idx - 1;
669 r->case_cnt = hdr.case_cnt;
670 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
672 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
673 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
676 if (r->bias != 100.0)
677 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
679 handle_get_filename (r->fh), r->bias);
681 /* Make a file label only on the condition that the given label is
682 not all spaces or nulls. */
686 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
687 if (!isspace ((unsigned char) hdr.file_label[i])
688 && hdr.file_label[i] != 0)
690 char *label = xmalloc (i + 2);
691 memcpy (label, hdr.file_label, i + 1);
693 dict_set_label (dict, label);
703 memcpy (info->creation_date, hdr.creation_date, 9);
704 info->creation_date[9] = 0;
706 memcpy (info->creation_time, hdr.creation_time, 8);
707 info->creation_time[8] = 0;
709 #ifdef WORDS_BIGENDIAN
710 info->big_endian = !r->reverse_endian;
712 info->big_endian = r->reverse_endian;
715 info->compressed = hdr.compress;
717 info->case_cnt = hdr.case_cnt;
719 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
720 if (isgraph ((unsigned char) *cp))
722 strcpy (info->product, cp);
731 /* Reads most of the dictionary from file H; also fills in the
732 associated VAR_BY_IDX array. */
734 read_variables (struct sfm_reader *r,
735 struct dictionary *dict, struct variable ***var_by_idx)
739 struct sysfile_variable sv; /* Disk buffer. */
740 int long_string_count = 0; /* # of long string continuation
741 records still expected. */
742 int next_value = 0; /* Index to next `value' structure. */
749 /* Pre-allocate variables. */
750 if ( r->value_cnt != -1 )
751 *var_by_idx = xmalloc(r->value_cnt * sizeof (**var_by_idx));
754 /* Read in the entry for each variable and use the info to
755 initialize the dictionary. */
763 if ( r->value_cnt != -1 && i >= r->value_cnt )
766 assertive_buf_read (r, &sv, sizeof sv, 0);
768 if (r->reverse_endian)
770 bswap_int32 (&sv.rec_type);
771 bswap_int32 (&sv.type);
772 bswap_int32 (&sv.has_var_label);
773 bswap_int32 (&sv.n_missing_values);
774 bswap_int32 (&sv.print);
775 bswap_int32 (&sv.write);
778 /* We've come to the end of the variable entries */
779 if (sv.rec_type != 2)
781 buf_unread(r, sizeof sv);
785 if ( -1 == r->value_cnt )
786 *var_by_idx = xrealloc (*var_by_idx, sizeof **var_by_idx * (i+1) );
788 /* If there was a long string previously, make sure that the
789 continuations are present; otherwise make sure there aren't
791 if (long_string_count)
794 lose ((ME, _("%s: position %d: String variable does not have "
795 "proper number of continuation records."),
796 handle_get_filename (r->fh), i));
798 (*var_by_idx)[i] = NULL;
802 else if (sv.type == -1)
803 lose ((ME, _("%s: position %d: Superfluous long string continuation "
805 handle_get_filename (r->fh), i));
807 /* Check fields for validity. */
808 if (sv.type < 0 || sv.type > 255)
809 lose ((ME, _("%s: position %d: Bad variable type code %d."),
810 handle_get_filename (r->fh), i, sv.type));
811 if (sv.has_var_label != 0 && sv.has_var_label != 1)
812 lose ((ME, _("%s: position %d: Variable label indicator field is not "
813 "0 or 1."), handle_get_filename (r->fh), i));
814 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
815 || sv.n_missing_values == -1)
816 lose ((ME, _("%s: position %d: Missing value indicator field is not "
817 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
819 /* Copy first character of variable name. */
820 if (!isalpha ((unsigned char) sv.name[0])
821 && sv.name[0] != '@' && sv.name[0] != '#')
822 lose ((ME, _("%s: position %d: Variable name begins with invalid "
824 handle_get_filename (r->fh), i));
825 if (islower ((unsigned char) sv.name[0]))
826 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
828 handle_get_filename (r->fh), i, sv.name[0]);
829 if (sv.name[0] == '#')
830 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
831 "(`#'). Scratch variables should not appear in system "
833 handle_get_filename (r->fh), i);
834 name[0] = toupper ((unsigned char) (sv.name[0]));
836 /* Copy remaining characters of variable name. */
837 for (j = 1; j < SHORT_NAME_LEN; j++)
839 int c = (unsigned char) sv.name[j];
843 else if (islower (c))
845 msg (MW, _("%s: position %d: Variable name character %d is "
846 "lowercase letter %c."),
847 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
848 name[j] = toupper ((unsigned char) (c));
850 else if (isalnum (c) || c == '.' || c == '@'
851 || c == '#' || c == '$' || c == '_')
854 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
856 handle_get_filename (r->fh), i, c, c));
860 /* Create variable. */
861 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
863 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
864 handle_get_filename (r->fh), name));
865 var_set_short_name (vv, vv->name);
867 /* Case reading data. */
868 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
869 long_string_count = nv - 1;
872 /* Get variable label, if any. */
873 if (sv.has_var_label == 1)
878 /* Read length of label. */
879 assertive_buf_read (r, &len, sizeof len, 0);
880 if (r->reverse_endian)
884 if (len < 0 || len > 255)
885 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
887 handle_get_filename (r->fh), vv->name, len));
891 /* Read label into variable structure. */
892 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
893 if (vv->label == NULL)
895 vv->label[len] = '\0';
899 /* Set missing values. */
900 if (sv.n_missing_values != 0)
904 if (vv->width > MAX_SHORT_STRING)
905 lose ((ME, _("%s: Long string variable %s may not have missing "
907 handle_get_filename (r->fh), vv->name));
909 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
911 if (r->reverse_endian && vv->type == NUMERIC)
912 for (j = 0; j < abs (sv.n_missing_values); j++)
913 bswap_flt64 (&mv[j]);
915 if (sv.n_missing_values > 0)
917 vv->miss_type = sv.n_missing_values;
918 if (vv->type == NUMERIC)
919 for (j = 0; j < sv.n_missing_values; j++)
920 vv->missing[j].f = mv[j];
922 for (j = 0; j < sv.n_missing_values; j++)
923 memcpy (vv->missing[j].s, &mv[j], vv->width);
929 if (vv->type == ALPHA)
930 lose ((ME, _("%s: String variable %s may not have missing "
931 "values specified as a range."),
932 handle_get_filename (r->fh), vv->name));
934 if (mv[0] == r->lowest)
936 vv->miss_type = MISSING_LOW;
937 vv->missing[x++].f = mv[1];
939 else if (mv[1] == r->highest)
941 vv->miss_type = MISSING_HIGH;
942 vv->missing[x++].f = mv[0];
946 vv->miss_type = MISSING_RANGE;
947 vv->missing[x++].f = mv[0];
948 vv->missing[x++].f = mv[1];
951 if (sv.n_missing_values == -3)
954 vv->missing[x++].f = mv[2];
959 vv->miss_type = MISSING_NONE;
961 if (!parse_format_spec (r, sv.print, &vv->print, vv)
962 || !parse_format_spec (r, sv.write, &vv->write, vv))
965 /* Add variable to list. */
966 if (var_cap >= r->var_cnt)
968 var_cap = 2 + r->var_cnt * 2;
969 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
971 r->vars[r->var_cnt].width = vv->width;
972 r->vars[r->var_cnt].fv = vv->fv;
976 /* Some consistency checks. */
977 if (long_string_count != 0)
978 lose ((ME, _("%s: Long string continuation records omitted at end of "
980 handle_get_filename (r->fh)));
982 if (next_value != r->value_cnt)
983 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
984 "%d were read from file."),
985 handle_get_filename (r->fh), r->value_cnt, next_value);
993 /* Translates the format spec from sysfile format to internal
996 parse_format_spec (struct sfm_reader *r, int32 s,
997 struct fmt_spec *f, struct variable *v)
999 f->type = translate_fmt ((s >> 16) & 0xff);
1001 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1002 handle_get_filename (r->fh), (s >> 16) & 0xff));
1003 f->w = (s >> 8) & 0xff;
1006 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1007 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1008 handle_get_filename (r->fh),
1009 v->type == ALPHA ? _("String") : _("Numeric"),
1011 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1012 formats[f->type].name));
1014 if (!check_output_specifier (f, false)
1015 || !check_specifier_width (f, v->width, false))
1017 msg (ME, _("%s variable %s has invalid format specifier %s."),
1018 v->type == NUMERIC ? _("Numeric") : _("String"),
1019 v->name, fmt_to_string (f));
1020 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1028 /* Reads value labels from sysfile H and inserts them into the
1029 associated dictionary. */
1031 read_value_labels (struct sfm_reader *r,
1032 struct dictionary *dict, struct variable **var_by_idx)
1036 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
1037 union value value; /* Value. */
1038 char *label; /* Null-terminated label string. */
1041 struct label *labels = NULL;
1042 int32 n_labels; /* Number of labels. */
1044 struct variable **var = NULL; /* Associated variables. */
1045 int32 n_vars; /* Number of associated variables. */
1049 /* First step: read the contents of the type 3 record and record its
1050 contents. Note that we can't do much with the data since we
1051 don't know yet whether it is of numeric or string type. */
1053 /* Read number of labels. */
1054 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1055 if (r->reverse_endian)
1056 bswap_int32 (&n_labels);
1058 /* Allocate memory. */
1059 labels = xmalloc (n_labels * sizeof *labels);
1060 for (i = 0; i < n_labels; i++)
1061 labels[i].label = NULL;
1063 /* Read each value/label tuple into labels[]. */
1064 for (i = 0; i < n_labels; i++)
1066 struct label *label = labels + i;
1067 unsigned char label_len;
1071 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1073 /* Read label length. */
1074 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1075 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1077 /* Read label, padding. */
1078 label->label = xmalloc (padded_len + 1);
1079 assertive_buf_read (r, label->label, padded_len - 1, 0);
1080 label->label[label_len] = 0;
1083 /* Second step: Read the type 4 record that has the list of
1084 variables to which the value labels are to be applied. */
1086 /* Read record type of type 4 record. */
1090 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1091 if (r->reverse_endian)
1092 bswap_int32 (&rec_type);
1095 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1096 "follow value label record (type 3) as it should."),
1097 handle_get_filename (r->fh)));
1100 /* Read number of variables associated with value label from type 4
1102 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1103 if (r->reverse_endian)
1104 bswap_int32 (&n_vars);
1105 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1106 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1107 "is not between 1 and the number of variables (%d)."),
1108 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1110 /* Read the list of variables. */
1111 var = xmalloc (n_vars * sizeof *var);
1112 for (i = 0; i < n_vars; i++)
1117 /* Read variable index, check range. */
1118 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1119 if (r->reverse_endian)
1120 bswap_int32 (&var_idx);
1121 if (var_idx < 1 || var_idx > r->value_cnt)
1122 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1123 "not between 1 and the number of values (%d)."),
1124 handle_get_filename (r->fh), var_idx, r->value_cnt));
1126 /* Make sure it's a real variable. */
1127 v = var_by_idx[var_idx - 1];
1129 lose ((ME, _("%s: Variable index associated with value label (%d) "
1130 "refers to a continuation of a string variable, not to "
1131 "an actual variable."),
1132 handle_get_filename (r->fh), var_idx));
1133 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1134 lose ((ME, _("%s: Value labels are not allowed on long string "
1136 handle_get_filename (r->fh), v->name));
1138 /* Add it to the list of variables. */
1142 /* Type check the variables. */
1143 for (i = 1; i < n_vars; i++)
1144 if (var[i]->type != var[0]->type)
1145 lose ((ME, _("%s: Variables associated with value label are not all of "
1146 "identical type. Variable %s has %s type, but variable "
1148 handle_get_filename (r->fh),
1149 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1150 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1152 /* Fill in labels[].value, now that we know the desired type. */
1153 for (i = 0; i < n_labels; i++)
1155 struct label *label = labels + i;
1157 if (var[0]->type == ALPHA)
1159 const int copy_len = min (sizeof (label->raw_value),
1160 sizeof (label->label));
1161 memcpy (label->value.s, label->raw_value, copy_len);
1164 assert (sizeof f == sizeof label->raw_value);
1165 memcpy (&f, label->raw_value, sizeof f);
1166 if (r->reverse_endian)
1172 /* Assign the value_label's to each variable. */
1173 for (i = 0; i < n_vars; i++)
1175 struct variable *v = var[i];
1178 /* Add each label to the variable. */
1179 for (j = 0; j < n_labels; j++)
1181 struct label *label = labels + j;
1182 if (!val_labs_replace (v->val_labs, label->value, label->label))
1185 if (var[0]->type == NUMERIC)
1186 msg (MW, _("%s: File contains duplicate label for value %g for "
1188 handle_get_filename (r->fh), label->value.f, v->name);
1190 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1191 "for variable %s."),
1192 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1196 for (i = 0; i < n_labels; i++)
1197 free (labels[i].label);
1205 for (i = 0; i < n_labels; i++)
1206 free (labels[i].label);
1213 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1214 non-NULL, uses that as the buffer; otherwise allocates at least
1215 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1218 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1222 if (buf == NULL && byte_cnt > 0 )
1223 buf = xmalloc (max (byte_cnt, min_alloc));
1225 if ( byte_cnt == 0 )
1229 if (1 != fread (buf, byte_cnt, 1, r->file))
1231 if (ferror (r->file))
1232 msg (ME, _("%s: Reading system file: %s."),
1233 handle_get_filename (r->fh), strerror (errno));
1235 corrupt_msg (ME, _("%s: Unexpected end of file."),
1236 handle_get_filename (r->fh));
1242 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1244 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1246 assert(byte_cnt > 0);
1248 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1250 msg (ME, _("%s: Seeking system file: %s."),
1251 handle_get_filename (r->fh), strerror (errno));
1255 /* Reads a document record, type 6, from system file R, and sets up
1256 the documents and n_documents fields in the associated
1259 read_documents (struct sfm_reader *r, struct dictionary *dict)
1264 if (dict_get_documents (dict) != NULL)
1265 lose ((ME, _("%s: System file contains multiple "
1266 "type 6 (document) records."),
1267 handle_get_filename (r->fh)));
1269 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1271 lose ((ME, _("%s: Number of document lines (%ld) "
1272 "must be greater than 0."),
1273 handle_get_filename (r->fh), (long) line_cnt));
1275 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1276 /* FIXME? Run through asciify. */
1277 if (documents == NULL)
1279 documents[80 * line_cnt] = '\0';
1280 dict_set_documents (dict, documents);
1290 /* Reads compressed data into H->BUF and sets other pointers
1291 appropriately. Returns nonzero only if both no errors occur and
1294 buffer_input (struct sfm_reader *r)
1299 r->buf = xmalloc (sizeof *r->buf * 128);
1300 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1301 if (ferror (r->file))
1303 msg (ME, _("%s: Error reading file: %s."),
1304 handle_get_filename (r->fh), strerror (errno));
1308 r->end = &r->buf[amt];
1312 /* Reads a single case consisting of compressed data from system
1313 file H into the array BUF[] according to reader R, and
1314 returns nonzero only if successful. */
1315 /* Data in system files is compressed in this manner. Data
1316 values are grouped into sets of eight ("octets"). Each value
1317 in an octet has one instruction byte that are output together.
1318 Each instruction byte gives a value for that byte or indicates
1319 that the value can be found following the instructions. */
1321 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1323 const unsigned char *p_end = r->x + sizeof (flt64);
1324 unsigned char *p = r->y;
1326 const flt64 *buf_beg = buf;
1327 const flt64 *buf_end = &buf[r->value_cnt];
1331 for (; p < p_end; p++)
1335 /* Code 0 is ignored. */
1338 /* Code 252 is end of file. */
1340 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1341 "in partial case."),
1342 handle_get_filename (r->fh)));
1345 /* Code 253 indicates that the value is stored explicitly
1346 following the instruction bytes. */
1347 if (r->ptr == NULL || r->ptr >= r->end)
1348 if (!buffer_input (r))
1350 lose ((ME, _("%s: Unexpected end of file."),
1351 handle_get_filename (r->fh)));
1354 memcpy (buf++, r->ptr++, sizeof *buf);
1359 /* Code 254 indicates a string that is all blanks. */
1360 memset (buf++, ' ', sizeof *buf);
1365 /* Code 255 indicates the system-missing value. */
1367 if (r->reverse_endian)
1374 /* Codes 1 through 251 inclusive are taken to indicate a
1375 value of (BYTE - BIAS), where BYTE is the byte's value
1376 and BIAS is the compression bias (generally 100.0). */
1377 *buf = *p - r->bias;
1378 if (r->reverse_endian)
1386 /* We have reached the end of this instruction octet. Read
1388 if (r->ptr == NULL || r->ptr >= r->end)
1389 if (!buffer_input (r))
1392 lose ((ME, _("%s: Unexpected end of file."),
1393 handle_get_filename (r->fh)));
1396 memcpy (r->x, r->ptr++, sizeof *buf);
1404 /* We have filled up an entire record. Update state and return
1410 /* We have been unsuccessful at filling a record, either through i/o
1411 error or through an end-of-file indication. Update state and
1412 return unsuccessfully. */
1416 /* Reads one case from READER's file into C. Returns nonzero
1417 only if successful. */
1419 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1421 if (!r->compressed && sizeof (flt64) == sizeof (double))
1423 /* Fast path: external and internal representations are the
1424 same, except possibly for endianness or SYSMIS. Read
1425 directly into the case's buffer, then fix up any minor
1426 details as needed. */
1427 if (!fread_ok (r, case_data_all_rw (c),
1428 sizeof (union value) * r->value_cnt))
1431 /* Fix up endianness if needed. */
1432 if (r->reverse_endian)
1436 for (i = 0; i < r->var_cnt; i++)
1437 if (r->vars[i].width == 0)
1438 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1441 /* Fix up SYSMIS values if needed.
1442 I don't think this will ever actually kick in, but it
1444 if (r->sysmis != SYSMIS)
1448 for (i = 0; i < r->var_cnt; i++)
1449 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1450 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1455 /* Slow path: internal and external representations differ.
1456 Read into a bounce buffer, then copy to C. */
1463 bounce_size = sizeof *bounce * r->value_cnt;
1464 bounce = bounce_cur = local_alloc (bounce_size);
1467 read_ok = fread_ok (r, bounce, bounce_size);
1469 read_ok = read_compressed_data (r, bounce);
1472 local_free (bounce);
1476 for (i = 0; i < r->var_cnt; i++)
1478 struct sfm_var *v = &r->vars[i];
1482 flt64 f = *bounce_cur++;
1483 if (r->reverse_endian)
1485 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1489 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1490 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1494 local_free (bounce);
1500 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1502 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1504 if (read_bytes == byte_cnt)
1508 if (ferror (r->file))
1509 msg (ME, _("%s: Reading system file: %s."),
1510 handle_get_filename (r->fh), strerror (errno));
1511 else if (read_bytes != 0)
1512 msg (ME, _("%s: Partial record at end of system file."),
1513 handle_get_filename (r->fh));