1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
44 #include "debug-print.h"
46 /* System file reader. */
49 struct file_handle *fh; /* File handle. */
50 FILE *file; /* File stream. */
52 int reverse_endian; /* 1=file has endianness opposite us. */
53 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
54 int value_cnt; /* Number of `union values's per case. */
55 long case_cnt; /* Number of cases, -1 if unknown. */
56 int compressed; /* 1=compressed, 0=not compressed. */
57 double bias; /* Compression bias, usually 100.0. */
58 int weight_idx; /* 0-based index of weighting variable, or -1. */
61 struct sfm_var *vars; /* Variables. */
62 size_t var_cnt; /* Number of variables. */
64 /* File's special constants. */
69 /* Decompression buffer. */
70 flt64 *buf; /* Buffer data. */
71 flt64 *ptr; /* Current location in buffer. */
72 flt64 *end; /* End of buffer data. */
74 /* Compression instruction octet. */
75 unsigned char x[8]; /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
88 /* Swap bytes *A and *B. */
90 bswap (unsigned char *a, unsigned char *b)
97 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
99 bswap_int32 (int32 *x_)
101 unsigned char *x = (unsigned char *) x_;
102 bswap (x + 0, x + 3);
103 bswap (x + 1, x + 2);
106 /* Reverse the byte order of 64-bit floating point *X. */
108 bswap_flt64 (flt64 *x_)
110 unsigned char *x = (unsigned char *) x_;
111 bswap (x + 0, x + 7);
112 bswap (x + 1, x + 6);
113 bswap (x + 2, x + 5);
114 bswap (x + 3, x + 4);
118 corrupt_msg (int class, const char *format,...)
119 PRINTF_FORMAT (2, 3);
121 /* Displays a corrupt sysfile error. */
123 corrupt_msg (int class, const char *format,...)
129 getl_location (&e.where.filename, &e.where.line_number);
130 e.title = _("corrupt system file: ");
132 va_start (args, format);
133 err_vmsg (&e, format, args);
137 /* Closes a system file after we're done with it. */
139 sfm_close_reader (struct sfm_reader *r)
145 fh_close (r->fh, "system file", "rs");
148 if (fn_close (handle_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 handle_get_filename (r->fh), strerror (errno));
158 /* Dictionary reader. */
160 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
163 static int read_header (struct sfm_reader *,
164 struct dictionary *, struct sfm_read_info *);
165 static int parse_format_spec (struct sfm_reader *, int32,
166 struct fmt_spec *, struct variable *);
167 static int read_value_labels (struct sfm_reader *, struct dictionary *,
168 struct variable **var_by_idx);
169 static int read_variables (struct sfm_reader *,
170 struct dictionary *, struct variable ***var_by_idx);
171 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
172 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
173 static int read_documents (struct sfm_reader *, struct dictionary *);
175 static int fread_ok (struct sfm_reader *, void *, size_t);
177 /* Displays the message X with corrupt_msg, then jumps to the error
185 /* Calls buf_read with the specified arguments, and jumps to
186 error if the read fails. */
187 #define assertive_buf_read(a,b,c,d) \
189 if (!buf_read (a,b,c,d)) \
193 /* Opens the system file designated by file handle FH for
194 reading. Reads the system file's dictionary into *DICT.
195 If INFO is non-null, then it receives additional info about the
198 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
199 struct sfm_read_info *info)
201 struct sfm_reader *r = NULL;
202 struct variable **var_by_idx = NULL;
204 *dict = dict_create ();
205 if (!fh_open (fh, "system file", "rs"))
208 /* Create and initialize reader. */
209 r = xmalloc (sizeof *r);
211 r->file = fn_open (handle_get_filename (fh), "rb");
213 r->reverse_endian = 0;
224 r->sysmis = -FLT64_MAX;
225 r->highest = FLT64_MAX;
226 r->lowest = second_lowest_flt64;
228 r->buf = r->ptr = r->end = NULL;
229 r->y = r->x + sizeof r->x;
231 /* Check that file open succeeded. */
234 msg (ME, _("An error occurred while opening \"%s\" for reading "
235 "as a system file: %s."),
236 handle_get_filename (r->fh), strerror (errno));
241 /* Read header and variables. */
242 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
245 /* Handle weighting. */
246 if (r->weight_idx != -1)
248 struct variable *weight_var = var_by_idx[r->weight_idx];
250 if (weight_var == NULL)
252 _("%s: Weighting variable may not be a continuation of "
253 "a long string variable."), handle_get_filename (fh)));
254 else if (weight_var->type == ALPHA)
255 lose ((ME, _("%s: Weighting variable may not be a string variable."),
256 handle_get_filename (fh)));
258 dict_set_weight (*dict, weight_var);
261 dict_set_weight (*dict, NULL);
263 /* Read records of types 3, 4, 6, and 7. */
268 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
269 if (r->reverse_endian)
270 bswap_int32 (&rec_type);
275 if (!read_value_labels (r, *dict, var_by_idx))
280 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
281 "records must always immediately follow type 3 "
283 handle_get_filename (r->fh)));
286 if (!read_documents (r, *dict))
302 assertive_buf_read (r, &data, sizeof data, 0);
303 if (r->reverse_endian)
305 bswap_int32 (&data.subtype);
306 bswap_int32 (&data.size);
307 bswap_int32 (&data.count);
310 switch (data.subtype)
313 if (!read_machine_int32_info (r, data.size, data.count))
318 if (!read_machine_flt64_info (r, data.size, data.count))
323 case 6: /* ?? Used by SPSS 8.0. */
327 case 11: /* Variable display parameters */
329 const int n_vars = data.count / 3 ;
331 if ( data.count % 3 )
333 msg (MW, _("%s: Invalid subrecord length. "
334 "Record: 7; Subrecord: 11"),
335 handle_get_filename (r->fh));
339 for ( i = 0 ; i < n_vars ; ++i )
351 assertive_buf_read (r, ¶ms, sizeof(params), 0);
353 v = dict_get_var(*dict, i);
355 v->measure = params.measure;
356 v->display_width = params.width;
357 v->alignment = params.align;
362 case 13: /* SPSS 12.0 Long variable name map */
366 char *buf = xmalloc(data.size * data.count + 1);
368 assertive_buf_read (r, buf, data.size * data.count, 0);
369 buf[data.size * data.count]='\0';
371 s = strtok_r(buf, "\t", &tbuf);
374 char *shortname, *longname;
375 shortname = strsep(&s,"=");
376 longname = strsep(&s,"=");
378 dict_add_longvar_entry(*dict, shortname, longname);
380 s = strtok_r(0,"\t", &tbuf);
388 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
389 "encountered in system file."),
390 handle_get_filename (r->fh), data.subtype);
396 void *x = buf_read (r, NULL, data.size * data.count, 0);
408 assertive_buf_read (r, &filler, sizeof filler, 0);
413 lose ((ME, _("%s: Unrecognized record type %d."),
414 handle_get_filename (r->fh), rec_type));
419 /* Come here on successful completion. */
424 /* Come here on unsuccessful completion. */
425 sfm_close_reader (r);
429 dict_destroy (*dict);
435 /* Read record type 7, subtype 3. */
437 read_machine_int32_info (struct sfm_reader *r, int size, int count)
444 if (size != sizeof (int32) || count != 8)
445 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
446 "subtype 3. Expected size %d, count 8."),
447 handle_get_filename (r->fh), size, count, sizeof (int32)));
449 assertive_buf_read (r, data, sizeof data, 0);
450 if (r->reverse_endian)
451 for (i = 0; i < 8; i++)
452 bswap_int32 (&data[i]);
456 lose ((ME, _("%s: Floating-point representation in system file is not "
457 "IEEE-754. PSPP cannot convert between floating-point "
459 handle_get_filename (r->fh)));
461 #error Add support for your floating-point format.
464 #ifdef WORDS_BIGENDIAN
469 if (r->reverse_endian)
471 if (file_bigendian ^ (data[6] == 1))
472 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
473 "endianness intuited from file header (%s)."),
474 handle_get_filename (r->fh),
475 file_bigendian ? _("big-endian") : _("little-endian"),
476 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
479 /* PORTME: Character representation code. */
480 if (data[7] != 2 && data[7] != 3)
481 lose ((ME, _("%s: File-indicated character representation code (%s) is "
483 handle_get_filename (r->fh),
484 (data[7] == 1 ? "EBCDIC"
485 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
493 /* Read record type 7, subtype 4. */
495 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
500 if (size != sizeof (flt64) || count != 3)
501 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
502 "subtype 4. Expected size %d, count 8."),
503 handle_get_filename (r->fh), size, count, sizeof (flt64)));
505 assertive_buf_read (r, data, sizeof data, 0);
506 if (r->reverse_endian)
507 for (i = 0; i < 3; i++)
508 bswap_flt64 (&data[i]);
510 if (data[0] != SYSMIS || data[1] != FLT64_MAX
511 || data[2] != second_lowest_flt64)
514 r->highest = data[1];
516 msg (MW, _("%s: File-indicated value is different from internal value "
517 "for at least one of the three system values. SYSMIS: "
518 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
520 handle_get_filename (r->fh), (double) data[0], (double) SYSMIS,
521 (double) data[1], (double) FLT64_MAX,
522 (double) data[2], (double) second_lowest_flt64);
532 read_header (struct sfm_reader *r,
533 struct dictionary *dict, struct sfm_read_info *info)
535 struct sysfile_header hdr; /* Disk buffer. */
536 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
537 int skip_amt = 0; /* Amount of product name to omit. */
540 /* Read header, check magic. */
541 assertive_buf_read (r, &hdr, sizeof hdr, 0);
542 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
543 lose ((ME, _("%s: Bad magic. Proper system files begin with "
544 "the four characters `$FL2'. This file will not be read."),
545 handle_get_filename (r->fh)));
547 /* Check eye-catcher string. */
548 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
549 for (i = 0; i < 60; i++)
550 if (!isprint ((unsigned char) prod_name[i]))
552 for (i = 59; i >= 0; i--)
553 if (!isgraph ((unsigned char) prod_name[i]))
558 prod_name[60] = '\0';
562 static const char *prefix[N_PREFIXES] =
564 "@(#) SPSS DATA FILE",
570 for (i = 0; i < N_PREFIXES; i++)
571 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
573 skip_amt = strlen (prefix[i]);
578 /* Check endianness. */
579 if (hdr.layout_code == 2)
580 r->reverse_endian = 0;
583 bswap_int32 (&hdr.layout_code);
584 if (hdr.layout_code != 2)
585 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
586 "should be 2, in big-endian or little-endian format."),
587 handle_get_filename (r->fh), hdr.layout_code));
589 r->reverse_endian = 1;
590 bswap_int32 (&hdr.case_size);
591 bswap_int32 (&hdr.compress);
592 bswap_int32 (&hdr.weight_idx);
593 bswap_int32 (&hdr.case_cnt);
594 bswap_flt64 (&hdr.bias);
597 /* Copy basic info and verify correctness. */
598 r->value_cnt = hdr.case_size;
599 if (r->value_cnt <= 0
600 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
601 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
603 handle_get_filename (r->fh), r->value_cnt,
604 INT_MAX / sizeof (union value) / 2));
606 r->compressed = hdr.compress;
608 r->weight_idx = hdr.weight_idx - 1;
609 if (hdr.weight_idx < 0 || hdr.weight_idx > r->value_cnt)
610 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
611 "and number of elements per case (%d)."),
612 handle_get_filename (r->fh), hdr.weight_idx, r->value_cnt));
614 r->case_cnt = hdr.case_cnt;
615 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
617 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
618 handle_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
621 if (r->bias != 100.0)
622 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
624 handle_get_filename (r->fh), r->bias);
626 /* Make a file label only on the condition that the given label is
627 not all spaces or nulls. */
631 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
632 if (!isspace ((unsigned char) hdr.file_label[i])
633 && hdr.file_label[i] != 0)
635 char *label = xmalloc (i + 2);
636 memcpy (label, hdr.file_label, i + 1);
638 dict_set_label (dict, label);
648 memcpy (info->creation_date, hdr.creation_date, 9);
649 info->creation_date[9] = 0;
651 memcpy (info->creation_time, hdr.creation_time, 8);
652 info->creation_time[8] = 0;
654 #ifdef WORDS_BIGENDIAN
655 info->big_endian = !r->reverse_endian;
657 info->big_endian = r->reverse_endian;
660 info->compressed = hdr.compress;
662 info->case_cnt = hdr.case_cnt;
664 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
665 if (isgraph ((unsigned char) *cp))
667 strcpy (info->product, cp);
676 /* Reads most of the dictionary from file H; also fills in the
677 associated VAR_BY_IDX array. */
679 read_variables (struct sfm_reader *r,
680 struct dictionary *dict, struct variable ***var_by_idx)
684 struct sysfile_variable sv; /* Disk buffer. */
685 int long_string_count = 0; /* # of long string continuation
686 records still expected. */
687 int next_value = 0; /* Index to next `value' structure. */
692 /* Allocate variables. */
693 *var_by_idx = xmalloc (sizeof **var_by_idx * r->value_cnt);
695 /* Read in the entry for each variable and use the info to
696 initialize the dictionary. */
697 for (i = 0; i < r->value_cnt; i++)
704 assertive_buf_read (r, &sv, sizeof sv, 0);
706 if (r->reverse_endian)
708 bswap_int32 (&sv.rec_type);
709 bswap_int32 (&sv.type);
710 bswap_int32 (&sv.has_var_label);
711 bswap_int32 (&sv.n_missing_values);
712 bswap_int32 (&sv.print);
713 bswap_int32 (&sv.write);
716 if (sv.rec_type != 2)
717 lose ((ME, _("%s: position %d: Bad record type (%d); "
718 "the expected value was 2."),
719 handle_get_filename (r->fh), i, sv.rec_type));
721 /* If there was a long string previously, make sure that the
722 continuations are present; otherwise make sure there aren't
724 if (long_string_count)
727 lose ((ME, _("%s: position %d: String variable does not have "
728 "proper number of continuation records."),
729 handle_get_filename (r->fh), i));
731 (*var_by_idx)[i] = NULL;
735 else if (sv.type == -1)
736 lose ((ME, _("%s: position %d: Superfluous long string continuation "
738 handle_get_filename (r->fh), i));
740 /* Check fields for validity. */
741 if (sv.type < 0 || sv.type > 255)
742 lose ((ME, _("%s: position %d: Bad variable type code %d."),
743 handle_get_filename (r->fh), i, sv.type));
744 if (sv.has_var_label != 0 && sv.has_var_label != 1)
745 lose ((ME, _("%s: position %d: Variable label indicator field is not "
746 "0 or 1."), handle_get_filename (r->fh), i));
747 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
748 || sv.n_missing_values == -1)
749 lose ((ME, _("%s: position %d: Missing value indicator field is not "
750 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (r->fh), i));
752 /* Copy first character of variable name. */
753 if (!isalpha ((unsigned char) sv.name[0])
754 && sv.name[0] != '@' && sv.name[0] != '#')
755 lose ((ME, _("%s: position %d: Variable name begins with invalid "
757 handle_get_filename (r->fh), i));
758 if (islower ((unsigned char) sv.name[0]))
759 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
761 handle_get_filename (r->fh), i, sv.name[0]);
762 if (sv.name[0] == '#')
763 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
764 "(`#'). Scratch variables should not appear in system "
766 handle_get_filename (r->fh), i);
767 name[0] = toupper ((unsigned char) (sv.name[0]));
769 /* Copy remaining characters of variable name. */
770 for (j = 1; j < SHORT_NAME_LEN; j++)
772 int c = (unsigned char) sv.name[j];
776 else if (islower (c))
778 msg (MW, _("%s: position %d: Variable name character %d is "
779 "lowercase letter %c."),
780 handle_get_filename (r->fh), i, j + 1, sv.name[j]);
781 name[j] = toupper ((unsigned char) (c));
783 else if (isalnum (c) || c == '.' || c == '@'
784 || c == '#' || c == '$' || c == '_')
787 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
789 handle_get_filename (r->fh), i, c, c));
793 /* Create variable. */
794 vv = (*var_by_idx)[i] = dict_create_var_from_short (dict, name, sv.type);
796 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
797 handle_get_filename (r->fh), name));
799 /* Case reading data. */
800 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
801 long_string_count = nv - 1;
804 /* Get variable label, if any. */
805 if (sv.has_var_label == 1)
810 /* Read length of label. */
811 assertive_buf_read (r, &len, sizeof len, 0);
812 if (r->reverse_endian)
816 if (len < 0 || len > 255)
817 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
819 handle_get_filename (r->fh), vv->name, len));
823 /* Read label into variable structure. */
824 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
825 if (vv->label == NULL)
827 vv->label[len] = '\0';
831 /* Set missing values. */
832 if (sv.n_missing_values != 0)
836 if (vv->width > MAX_SHORT_STRING)
837 lose ((ME, _("%s: Long string variable %s may not have missing "
839 handle_get_filename (r->fh), vv->name));
841 assertive_buf_read (r, mv, sizeof *mv * abs (sv.n_missing_values), 0);
843 if (r->reverse_endian && vv->type == NUMERIC)
844 for (j = 0; j < abs (sv.n_missing_values); j++)
845 bswap_flt64 (&mv[j]);
847 if (sv.n_missing_values > 0)
849 vv->miss_type = sv.n_missing_values;
850 if (vv->type == NUMERIC)
851 for (j = 0; j < sv.n_missing_values; j++)
852 vv->missing[j].f = mv[j];
854 for (j = 0; j < sv.n_missing_values; j++)
855 memcpy (vv->missing[j].s, &mv[j], vv->width);
861 if (vv->type == ALPHA)
862 lose ((ME, _("%s: String variable %s may not have missing "
863 "values specified as a range."),
864 handle_get_filename (r->fh), vv->name));
866 if (mv[0] == r->lowest)
868 vv->miss_type = MISSING_LOW;
869 vv->missing[x++].f = mv[1];
871 else if (mv[1] == r->highest)
873 vv->miss_type = MISSING_HIGH;
874 vv->missing[x++].f = mv[0];
878 vv->miss_type = MISSING_RANGE;
879 vv->missing[x++].f = mv[0];
880 vv->missing[x++].f = mv[1];
883 if (sv.n_missing_values == -3)
886 vv->missing[x++].f = mv[2];
891 vv->miss_type = MISSING_NONE;
893 if (!parse_format_spec (r, sv.print, &vv->print, vv)
894 || !parse_format_spec (r, sv.write, &vv->write, vv))
897 /* Add variable to list. */
898 if (var_cap >= r->var_cnt)
900 var_cap = 2 + r->var_cnt * 2;
901 r->vars = xrealloc (r->vars, var_cap * sizeof *r->vars);
903 r->vars[r->var_cnt].width = vv->width;
904 r->vars[r->var_cnt].fv = vv->fv;
908 /* Some consistency checks. */
909 if (long_string_count != 0)
910 lose ((ME, _("%s: Long string continuation records omitted at end of "
912 handle_get_filename (r->fh)));
913 if (next_value != r->value_cnt)
914 lose ((ME, _("%s: System file header indicates %d variable positions but "
915 "%d were read from file."),
916 handle_get_filename (r->fh), r->value_cnt, next_value));
924 /* Translates the format spec from sysfile format to internal
927 parse_format_spec (struct sfm_reader *r, int32 s, struct fmt_spec *v, struct variable *vv)
929 v->type = translate_fmt ((s >> 16) & 0xff);
931 lose ((ME, _("%s: Bad format specifier byte (%d)."),
932 handle_get_filename (r->fh), (s >> 16) & 0xff));
933 v->w = (s >> 8) & 0xff;
936 /* FIXME? Should verify the resulting specifier more thoroughly. */
939 lose ((ME, _("%s: Bad format specifier byte (%d)."),
940 handle_get_filename (r->fh), (s >> 16) & 0xff));
941 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
942 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
943 handle_get_filename (r->fh),
944 vv->type == ALPHA ? _("String") : _("Numeric"),
946 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
947 formats[v->type].name));
954 /* Reads value labels from sysfile H and inserts them into the
955 associated dictionary. */
957 read_value_labels (struct sfm_reader *r,
958 struct dictionary *dict, struct variable **var_by_idx)
962 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
963 union value value; /* Value. */
964 char *label; /* Null-terminated label string. */
967 struct label *labels = NULL;
968 int32 n_labels; /* Number of labels. */
970 struct variable **var = NULL; /* Associated variables. */
971 int32 n_vars; /* Number of associated variables. */
975 /* First step: read the contents of the type 3 record and record its
976 contents. Note that we can't do much with the data since we
977 don't know yet whether it is of numeric or string type. */
979 /* Read number of labels. */
980 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
981 if (r->reverse_endian)
982 bswap_int32 (&n_labels);
984 /* Allocate memory. */
985 labels = xmalloc (n_labels * sizeof *labels);
986 for (i = 0; i < n_labels; i++)
987 labels[i].label = NULL;
989 /* Read each value/label tuple into labels[]. */
990 for (i = 0; i < n_labels; i++)
992 struct label *label = labels + i;
993 unsigned char label_len;
997 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
999 /* Read label length. */
1000 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1001 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1003 /* Read label, padding. */
1004 label->label = xmalloc (padded_len + 1);
1005 assertive_buf_read (r, label->label, padded_len - 1, 0);
1006 label->label[label_len] = 0;
1009 /* Second step: Read the type 4 record that has the list of
1010 variables to which the value labels are to be applied. */
1012 /* Read record type of type 4 record. */
1016 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1017 if (r->reverse_endian)
1018 bswap_int32 (&rec_type);
1021 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1022 "follow value label record (type 3) as it should."),
1023 handle_get_filename (r->fh)));
1026 /* Read number of variables associated with value label from type 4
1028 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1029 if (r->reverse_endian)
1030 bswap_int32 (&n_vars);
1031 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1032 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1033 "is not between 1 and the number of variables (%d)."),
1034 handle_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1036 /* Read the list of variables. */
1037 var = xmalloc (n_vars * sizeof *var);
1038 for (i = 0; i < n_vars; i++)
1043 /* Read variable index, check range. */
1044 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1045 if (r->reverse_endian)
1046 bswap_int32 (&var_idx);
1047 if (var_idx < 1 || var_idx > r->value_cnt)
1048 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1049 "not between 1 and the number of values (%d)."),
1050 handle_get_filename (r->fh), var_idx, r->value_cnt));
1052 /* Make sure it's a real variable. */
1053 v = var_by_idx[var_idx - 1];
1055 lose ((ME, _("%s: Variable index associated with value label (%d) "
1056 "refers to a continuation of a string variable, not to "
1057 "an actual variable."),
1058 handle_get_filename (r->fh), var_idx));
1059 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1060 lose ((ME, _("%s: Value labels are not allowed on long string "
1062 handle_get_filename (r->fh), v->name));
1064 /* Add it to the list of variables. */
1068 /* Type check the variables. */
1069 for (i = 1; i < n_vars; i++)
1070 if (var[i]->type != var[0]->type)
1071 lose ((ME, _("%s: Variables associated with value label are not all of "
1072 "identical type. Variable %s has %s type, but variable "
1074 handle_get_filename (r->fh),
1075 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1076 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1078 /* Fill in labels[].value, now that we know the desired type. */
1079 for (i = 0; i < n_labels; i++)
1081 struct label *label = labels + i;
1083 if (var[0]->type == ALPHA)
1085 const int copy_len = min (sizeof (label->raw_value),
1086 sizeof (label->label));
1087 memcpy (label->value.s, label->raw_value, copy_len);
1090 assert (sizeof f == sizeof label->raw_value);
1091 memcpy (&f, label->raw_value, sizeof f);
1092 if (r->reverse_endian)
1098 /* Assign the value_label's to each variable. */
1099 for (i = 0; i < n_vars; i++)
1101 struct variable *v = var[i];
1104 /* Add each label to the variable. */
1105 for (j = 0; j < n_labels; j++)
1107 struct label *label = labels + j;
1108 if (!val_labs_replace (v->val_labs, label->value, label->label))
1111 if (var[0]->type == NUMERIC)
1112 msg (MW, _("%s: File contains duplicate label for value %g for "
1114 handle_get_filename (r->fh), label->value.f, v->name);
1116 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1117 "for variable %s."),
1118 handle_get_filename (r->fh), v->width, label->value.s, v->name);
1122 for (i = 0; i < n_labels; i++)
1123 free (labels[i].label);
1131 for (i = 0; i < n_labels; i++)
1132 free (labels[i].label);
1139 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1140 non-NULL, uses that as the buffer; otherwise allocates at least
1141 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1144 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1148 if (buf == NULL && byte_cnt > 0 )
1149 buf = xmalloc (max (byte_cnt, min_alloc));
1151 if ( byte_cnt == 0 )
1155 if (1 != fread (buf, byte_cnt, 1, r->file))
1157 if (ferror (r->file))
1158 msg (ME, _("%s: Reading system file: %s."),
1159 handle_get_filename (r->fh), strerror (errno));
1161 corrupt_msg (ME, _("%s: Unexpected end of file."),
1162 handle_get_filename (r->fh));
1168 /* Reads a document record, type 6, from system file R, and sets up
1169 the documents and n_documents fields in the associated
1172 read_documents (struct sfm_reader *r, struct dictionary *dict)
1177 if (dict_get_documents (dict) != NULL)
1178 lose ((ME, _("%s: System file contains multiple "
1179 "type 6 (document) records."),
1180 handle_get_filename (r->fh)));
1182 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1184 lose ((ME, _("%s: Number of document lines (%ld) "
1185 "must be greater than 0."),
1186 handle_get_filename (r->fh), (long) line_cnt));
1188 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1189 /* FIXME? Run through asciify. */
1190 if (documents == NULL)
1192 documents[80 * line_cnt] = '\0';
1193 dict_set_documents (dict, documents);
1203 /* Reads compressed data into H->BUF and sets other pointers
1204 appropriately. Returns nonzero only if both no errors occur and
1207 buffer_input (struct sfm_reader *r)
1212 r->buf = xmalloc (sizeof *r->buf * 128);
1213 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1214 if (ferror (r->file))
1216 msg (ME, _("%s: Error reading file: %s."),
1217 handle_get_filename (r->fh), strerror (errno));
1221 r->end = &r->buf[amt];
1225 /* Reads a single case consisting of compressed data from system
1226 file H into the array BUF[] according to reader R, and
1227 returns nonzero only if successful. */
1228 /* Data in system files is compressed in this manner. Data
1229 values are grouped into sets of eight ("octets"). Each value
1230 in an octet has one instruction byte that are output together.
1231 Each instruction byte gives a value for that byte or indicates
1232 that the value can be found following the instructions. */
1234 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1236 const unsigned char *p_end = r->x + sizeof (flt64);
1237 unsigned char *p = r->y;
1239 const flt64 *buf_beg = buf;
1240 const flt64 *buf_end = &buf[r->value_cnt];
1244 for (; p < p_end; p++)
1248 /* Code 0 is ignored. */
1251 /* Code 252 is end of file. */
1253 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1254 "in partial case."),
1255 handle_get_filename (r->fh)));
1258 /* Code 253 indicates that the value is stored explicitly
1259 following the instruction bytes. */
1260 if (r->ptr == NULL || r->ptr >= r->end)
1261 if (!buffer_input (r))
1263 lose ((ME, _("%s: Unexpected end of file."),
1264 handle_get_filename (r->fh)));
1267 memcpy (buf++, r->ptr++, sizeof *buf);
1272 /* Code 254 indicates a string that is all blanks. */
1273 memset (buf++, ' ', sizeof *buf);
1278 /* Code 255 indicates the system-missing value. */
1280 if (r->reverse_endian)
1287 /* Codes 1 through 251 inclusive are taken to indicate a
1288 value of (BYTE - BIAS), where BYTE is the byte's value
1289 and BIAS is the compression bias (generally 100.0). */
1290 *buf = *p - r->bias;
1291 if (r->reverse_endian)
1299 /* We have reached the end of this instruction octet. Read
1301 if (r->ptr == NULL || r->ptr >= r->end)
1302 if (!buffer_input (r))
1305 lose ((ME, _("%s: Unexpected end of file."),
1306 handle_get_filename (r->fh)));
1309 memcpy (r->x, r->ptr++, sizeof *buf);
1317 /* We have filled up an entire record. Update state and return
1323 /* We have been unsuccessful at filling a record, either through i/o
1324 error or through an end-of-file indication. Update state and
1325 return unsuccessfully. */
1329 /* Reads one case from READER's file into C. Returns nonzero
1330 only if successful. */
1332 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1334 if (!r->compressed && sizeof (flt64) == sizeof (double))
1336 /* Fast path: external and internal representations are the
1337 same, except possibly for endianness or SYSMIS. Read
1338 directly into the case's buffer, then fix up any minor
1339 details as needed. */
1340 if (!fread_ok (r, case_data_all_rw (c),
1341 sizeof (union value) * r->value_cnt))
1344 /* Fix up endianness if needed. */
1345 if (r->reverse_endian)
1349 for (i = 0; i < r->var_cnt; i++)
1350 if (r->vars[i].width == 0)
1351 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1354 /* Fix up SYSMIS values if needed.
1355 I don't think this will ever actually kick in, but it
1357 if (r->sysmis != SYSMIS)
1361 for (i = 0; i < r->var_cnt; i++)
1362 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1363 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1368 /* Slow path: internal and external representations differ.
1369 Read into a bounce buffer, then copy to C. */
1376 bounce_size = sizeof *bounce * r->value_cnt;
1377 bounce = bounce_cur = local_alloc (bounce_size);
1380 read_ok = fread_ok (r, bounce, bounce_size);
1382 read_ok = read_compressed_data (r, bounce);
1385 local_free (bounce);
1389 for (i = 0; i < r->var_cnt; i++)
1391 struct sfm_var *v = &r->vars[i];
1395 flt64 f = *bounce_cur++;
1396 if (r->reverse_endian)
1398 case_data_rw (c, i)->f = f == r->sysmis ? SYSMIS : f;
1402 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1403 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1407 local_free (bounce);
1413 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1415 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1417 if (read_bytes == byte_cnt)
1421 if (ferror (r->file))
1422 msg (ME, _("%s: Reading system file: %s."),
1423 handle_get_filename (r->fh), strerror (errno));
1424 else if (read_bytes != 0)
1425 msg (ME, _("%s: Partial record at end of system file."),
1426 handle_get_filename (r->fh));