1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <libpspp/alloc.h>
28 #include <libpspp/message.h>
29 #include <libpspp/compiler.h>
30 #include <libpspp/magic.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/str.h>
33 #include <libpspp/hash.h>
35 #include "sys-file-reader.h"
36 #include "sfm-private.h"
38 #include "dictionary.h"
39 #include "file-handle-def.h"
40 #include "file-name.h"
42 #include "value-labels.h"
47 #define _(msgid) gettext (msgid)
49 /* System file reader. */
52 struct file_handle *fh; /* File handle. */
53 FILE *file; /* File stream. */
55 int reverse_endian; /* 1=file has endianness opposite us. */
56 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
57 int value_cnt; /* Number of `union values's per case. */
58 long case_cnt; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_idx; /* 0-based index of weighting variable, or -1. */
62 bool ok; /* False after an I/O error or corrupt data. */
65 struct sfm_var *vars; /* Variables. */
67 /* File's special constants. */
72 /* Decompression buffer. */
73 flt64 *buf; /* Buffer data. */
74 flt64 *ptr; /* Current location in buffer. */
75 flt64 *end; /* End of buffer data. */
77 /* Compression instruction octet. */
78 unsigned char x[8]; /* Current instruction octet. */
79 unsigned char *y; /* Location in current instruction octet. */
82 /* A variable in a system file. */
85 int width; /* 0=numeric, otherwise string width. */
86 int fv; /* Index into case. */
91 /* Swap bytes *A and *B. */
93 bswap (char *a, char *b)
100 /* Reverse the byte order of 32-bit integer *X. */
102 bswap_int32 (int32_t *x_)
104 char *x = (char *) x_;
105 bswap (x + 0, x + 3);
106 bswap (x + 1, x + 2);
109 /* Reverse the byte order of 64-bit floating point *X. */
111 bswap_flt64 (flt64 *x_)
113 char *x = (char *) x_;
114 bswap (x + 0, x + 7);
115 bswap (x + 1, x + 6);
116 bswap (x + 2, x + 5);
117 bswap (x + 3, x + 4);
121 corrupt_msg (int class, const char *format,...)
122 PRINTF_FORMAT (2, 3);
124 /* Displays a corrupt sysfile error. */
126 corrupt_msg (int class, const char *format,...)
132 ds_create (&text, _("corrupt system file: "));
133 va_start (args, format);
134 ds_vprintf (&text, format, args);
137 m.category = msg_class_to_category (class);
138 m.severity = msg_class_to_severity (class);
139 m.where.file_name = NULL;
140 m.where.line_number = 0;
141 m.text = ds_c_str (&text);
146 /* Closes a system file after we're done with it. */
148 sfm_close_reader (struct sfm_reader *r)
155 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
156 msg (ME, _("%s: Closing system file: %s."),
157 fh_get_file_name (r->fh), strerror (errno));
162 fh_close (r->fh, "system file", "rs");
169 /* Dictionary reader. */
171 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
173 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
176 static int read_header (struct sfm_reader *,
177 struct dictionary *, struct sfm_read_info *);
178 static int parse_format_spec (struct sfm_reader *, int32_t,
179 struct fmt_spec *, const struct variable *);
180 static int read_value_labels (struct sfm_reader *, struct dictionary *,
181 struct variable **var_by_idx);
182 static int read_variables (struct sfm_reader *,
183 struct dictionary *, struct variable ***var_by_idx);
184 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
185 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
186 static int read_documents (struct sfm_reader *, struct dictionary *);
188 static int fread_ok (struct sfm_reader *, void *, size_t);
190 /* Displays the message X with corrupt_msg, then jumps to the error
198 /* Calls buf_read with the specified arguments, and jumps to
199 error if the read fails. */
200 #define assertive_buf_read(a,b,c,d) \
202 if (!buf_read (a,b,c,d)) \
214 pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
216 const struct name_pair *p1 = _p1;
217 const struct name_pair *p2 = _p2;
219 return strcmp(p1->shortname, p2->shortname);
223 pair_sn_hash(const void *_p, void *aux UNUSED)
225 const struct name_pair *p = _p;
226 return hsh_hash_bytes(p->shortname, strlen(p->shortname));
230 pair_sn_free(void *p, void *aux UNUSED)
236 /* Opens the system file designated by file handle FH for
237 reading. Reads the system file's dictionary into *DICT.
238 If INFO is non-null, then it receives additional info about the
241 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
242 struct sfm_read_info *info)
244 struct sfm_reader *r = NULL;
245 struct variable **var_by_idx = NULL;
247 /* A hash table of long variable names indexed by short name */
248 struct hsh_table *short_to_long = NULL;
250 *dict = dict_create ();
251 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
254 /* Create and initialize reader. */
255 r = xmalloc (sizeof *r);
257 r->file = fn_open (fh_get_file_name (fh), "rb");
259 r->reverse_endian = 0;
270 r->sysmis = -FLT64_MAX;
271 r->highest = FLT64_MAX;
272 r->lowest = second_lowest_flt64;
274 r->buf = r->ptr = r->end = NULL;
275 r->y = r->x + sizeof r->x;
277 /* Check that file open succeeded. */
280 msg (ME, _("An error occurred while opening \"%s\" for reading "
281 "as a system file: %s."),
282 fh_get_file_name (r->fh), strerror (errno));
286 /* Read header and variables. */
287 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
291 /* Handle weighting. */
292 if (r->weight_idx != -1)
294 struct variable *weight_var;
296 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
297 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
298 "and number of elements per case (%d)."),
299 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
302 weight_var = var_by_idx[r->weight_idx];
304 if (weight_var == NULL)
306 _("%s: Weighting variable may not be a continuation of "
307 "a long string variable."), fh_get_file_name (fh)));
308 else if (weight_var->type == ALPHA)
309 lose ((ME, _("%s: Weighting variable may not be a string variable."),
310 fh_get_file_name (fh)));
312 dict_set_weight (*dict, weight_var);
315 dict_set_weight (*dict, NULL);
317 /* Read records of types 3, 4, 6, and 7. */
322 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
323 if (r->reverse_endian)
324 bswap_int32 (&rec_type);
330 if (!read_value_labels (r, *dict, var_by_idx))
335 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
336 "records must always immediately follow type 3 "
338 fh_get_file_name (r->fh)));
341 if (!read_documents (r, *dict))
358 assertive_buf_read (r, &data, sizeof data, 0);
359 if (r->reverse_endian)
361 bswap_int32 (&data.subtype);
362 bswap_int32 (&data.size);
363 bswap_int32 (&data.count);
365 bytes = data.size * data.count;
367 if (bytes < data.size || bytes < data.count)
368 lose ((ME, "%s: Record type %d subtype %d too large.",
369 fh_get_file_name (r->fh), rec_type, data.subtype));
371 switch (data.subtype)
374 if (!read_machine_int32_info (r, data.size, data.count))
379 if (!read_machine_flt64_info (r, data.size, data.count))
384 case 6: /* ?? Used by SPSS 8.0. */
388 case 11: /* Variable display parameters */
390 const int n_vars = data.count / 3 ;
392 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
394 msg (MW, _("%s: Invalid subrecord length. "
395 "Record: 7; Subrecord: 11"),
396 fh_get_file_name (r->fh));
401 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
413 assertive_buf_read (r, ¶ms, sizeof(params), 0);
415 v = dict_get_var(*dict, i);
417 v->measure = params.measure;
418 v->display_width = params.width;
419 v->alignment = params.align;
424 case 13: /* SPSS 12.0 Long variable name map */
426 char *buf, *short_name, *save_ptr;
430 buf = xmalloc (bytes + 1);
431 if (!buf_read (r, buf, bytes, 0))
438 short_to_long = hsh_create(4,
445 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
447 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
449 struct name_pair *pair ;
450 char *long_name = strtok_r (NULL, "\t", &save_ptr);
453 /* Validate long name. */
454 if (long_name == NULL)
456 msg (MW, _("%s: Trailing garbage in long variable "
458 fh_get_file_name (r->fh));
461 if (!var_is_valid_name (long_name, false))
463 msg (MW, _("%s: Long variable mapping to invalid "
464 "variable name `%s'."),
465 fh_get_file_name (r->fh), long_name);
469 /* Find variable using short name. */
470 v = dict_lookup_var (*dict, short_name);
473 msg (MW, _("%s: Long variable mapping for "
474 "nonexistent variable %s."),
475 fh_get_file_name (r->fh), short_name);
479 /* Identify any duplicates. */
480 if ( compare_var_names(short_name, long_name, 0) &&
481 NULL != dict_lookup_var (*dict, long_name))
482 lose ((ME, _("%s: Duplicate long variable name `%s' "
483 "within system file."),
484 fh_get_file_name (r->fh), long_name));
488 Renaming a variable may clear the short
489 name, but we want to retain it, so
490 re-set it explicitly. */
491 dict_rename_var (*dict, v, long_name);
492 var_set_short_name (v, short_name);
494 pair = xmalloc(sizeof *pair);
495 pair->shortname = short_name;
496 pair->longname = long_name;
497 hsh_insert(short_to_long, pair);
499 /* This messes up the processing of subtype 14 (below).
500 I'm not sure if it is needed anyway, so I'm removing it for
501 now. If it's needed, then it will need to be done after all the
502 records have been processed. --- JMD 27 April 2006
505 /* For compatability, make sure dictionary
506 is in long variable name map order. In
507 the common case, this has no effect,
508 because the dictionary and the long
509 variable name map are already in the
511 dict_reorder_var (*dict, v, idx);
524 bool eq_seen = false;
528 char *buf = xmalloc (bytes + 1);
529 if (!buf_read (r, buf, bytes, 0))
537 /* Note: SPSS v13 terminates this record with 00,
538 whereas SPSS v14 terminates it with 00 09. We must
540 for(i = 0; i < bytes ; ++i)
543 static char name[SHORT_NAME_LEN + 1];
544 static char len_str[6];
553 length = strtol(len_str, 0, 10);
554 if ( length != LONG_MAX && length != LONG_MIN)
556 char *lookup_name = name;
563 struct name_pair pair;
566 pair.shortname = name;
567 p = hsh_find(short_to_long, &pair);
569 lookup_name = p->longname;
573 v = dict_lookup_var(*dict, lookup_name);
577 _("%s: No variable called %s but it is listed in length table."),
578 fh_get_file_name (r->fh), lookup_name);
585 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
586 l -= EFFECTIVE_LONG_STRING_LENGTH;
593 struct variable *v_next;
594 v_next = dict_get_var(*dict, idx + 1);
596 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
597 l -= EFFECTIVE_LONG_STRING_LENGTH;
601 dict_delete_var(*dict, v_next);
605 v->print.w = v->width;
606 v->write.w = v->width;
609 memset(name, 0, SHORT_NAME_LEN+1);
610 memset(len_str, 0, 6);
629 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
630 "encountered in system file."),
631 fh_get_file_name (r->fh), data.subtype);
637 void *x = buf_read (r, NULL, data.size * data.count, 0);
649 assertive_buf_read (r, &filler, sizeof filler, 0);
655 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
656 fh_get_file_name (r->fh), rec_type);
661 /* Come here on successful completion. */
665 hsh_destroy(short_to_long);
669 /* Come here on unsuccessful completion. */
670 sfm_close_reader (r);
672 hsh_destroy(short_to_long);
675 dict_destroy (*dict);
681 /* Read record type 7, subtype 3. */
683 read_machine_int32_info (struct sfm_reader *r, int size, int count)
690 if (size != sizeof (int32_t) || count != 8)
691 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
692 "subtype 3. Expected size %d, count 8."),
693 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
695 assertive_buf_read (r, data, sizeof data, 0);
696 if (r->reverse_endian)
697 for (i = 0; i < 8; i++)
698 bswap_int32 (&data[i]);
702 lose ((ME, _("%s: Floating-point representation in system file is not "
703 "IEEE-754. PSPP cannot convert between floating-point "
705 fh_get_file_name (r->fh)));
707 #error Add support for your floating-point format.
710 #ifdef WORDS_BIGENDIAN
715 if (r->reverse_endian)
717 if (file_bigendian ^ (data[6] == 1))
718 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
719 "endianness intuited from file header (%s)."),
720 fh_get_file_name (r->fh),
721 file_bigendian ? _("big-endian") : _("little-endian"),
722 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
725 /* PORTME: Character representation code. */
726 if (data[7] != 2 && data[7] != 3)
727 lose ((ME, _("%s: File-indicated character representation code (%s) is "
729 fh_get_file_name (r->fh),
730 (data[7] == 1 ? "EBCDIC"
731 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
739 /* Read record type 7, subtype 4. */
741 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
746 if (size != sizeof (flt64) || count != 3)
747 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
748 "subtype 4. Expected size %d, count 8."),
749 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
751 assertive_buf_read (r, data, sizeof data, 0);
752 if (r->reverse_endian)
753 for (i = 0; i < 3; i++)
754 bswap_flt64 (&data[i]);
756 if (data[0] != SYSMIS || data[1] != FLT64_MAX
757 || data[2] != second_lowest_flt64)
760 r->highest = data[1];
762 msg (MW, _("%s: File-indicated value is different from internal value "
763 "for at least one of the three system values. SYSMIS: "
764 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
766 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
767 (double) data[1], (double) FLT64_MAX,
768 (double) data[2], (double) second_lowest_flt64);
778 read_header (struct sfm_reader *r,
779 struct dictionary *dict, struct sfm_read_info *info)
781 struct sysfile_header hdr; /* Disk buffer. */
782 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
783 int skip_amt = 0; /* Amount of product name to omit. */
786 /* Read header, check magic. */
787 assertive_buf_read (r, &hdr, sizeof hdr, 0);
788 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
789 lose ((ME, _("%s: Bad magic. Proper system files begin with "
790 "the four characters `$FL2'. This file will not be read."),
791 fh_get_file_name (r->fh)));
793 /* Check eye-category.her string. */
794 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
795 for (i = 0; i < 60; i++)
796 if (!c_isprint ((unsigned char) prod_name[i]))
798 for (i = 59; i >= 0; i--)
799 if (!c_isgraph ((unsigned char) prod_name[i]))
804 prod_name[60] = '\0';
808 static const char *prefix[N_PREFIXES] =
810 "@(#) SPSS DATA FILE",
816 for (i = 0; i < N_PREFIXES; i++)
817 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
819 skip_amt = strlen (prefix[i]);
824 /* Check endianness. */
825 if (hdr.layout_code == 2)
826 r->reverse_endian = 0;
829 bswap_int32 (&hdr.layout_code);
830 if (hdr.layout_code != 2)
831 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
832 "should be 2, in big-endian or little-endian format."),
833 fh_get_file_name (r->fh), hdr.layout_code));
835 r->reverse_endian = 1;
836 bswap_int32 (&hdr.nominal_case_size);
837 bswap_int32 (&hdr.compress);
838 bswap_int32 (&hdr.weight_idx);
839 bswap_int32 (&hdr.case_cnt);
840 bswap_flt64 (&hdr.bias);
844 /* Copy basic info and verify correctness. */
845 r->value_cnt = hdr.nominal_case_size;
847 /* If value count is rediculous, then force it to -1 (a sentinel value) */
848 if ( r->value_cnt < 0 ||
849 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
852 r->compressed = hdr.compress;
854 r->weight_idx = hdr.weight_idx - 1;
856 r->case_cnt = hdr.case_cnt;
857 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
859 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
860 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
863 if (r->bias != 100.0)
864 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
866 fh_get_file_name (r->fh), r->bias);
868 /* Make a file label only on the condition that the given label is
869 not all spaces or nulls. */
873 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
875 if (!c_isspace ((unsigned char) hdr.file_label[i])
876 && hdr.file_label[i] != 0)
878 char *label = xmalloc (i + 2);
879 memcpy (label, hdr.file_label, i + 1);
881 dict_set_label (dict, label);
892 memcpy (info->creation_date, hdr.creation_date, 9);
893 info->creation_date[9] = 0;
895 memcpy (info->creation_time, hdr.creation_time, 8);
896 info->creation_time[8] = 0;
898 #ifdef WORDS_BIGENDIAN
899 info->big_endian = !r->reverse_endian;
901 info->big_endian = r->reverse_endian;
904 info->compressed = hdr.compress;
906 info->case_cnt = hdr.case_cnt;
908 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
909 if (c_isgraph ((unsigned char) *cp))
911 strcpy (info->product, cp);
920 /* Reads most of the dictionary from file H; also fills in the
921 associated VAR_BY_IDX array. */
923 read_variables (struct sfm_reader *r,
924 struct dictionary *dict, struct variable ***var_by_idx)
928 struct sysfile_variable sv; /* Disk buffer. */
929 int long_string_count = 0; /* # of long string continuation
930 records still expected. */
931 int next_value = 0; /* Index to next `value' structure. */
937 /* Pre-allocate variables. */
938 if (r->value_cnt != -1)
940 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
941 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
945 /* Read in the entry for each variable and use the info to
946 initialize the dictionary. */
950 char name[SHORT_NAME_LEN + 1];
954 assertive_buf_read (r, &sv, sizeof sv, 0);
956 if (r->reverse_endian)
958 bswap_int32 (&sv.rec_type);
959 bswap_int32 (&sv.type);
960 bswap_int32 (&sv.has_var_label);
961 bswap_int32 (&sv.n_missing_values);
962 bswap_int32 (&sv.print);
963 bswap_int32 (&sv.write);
966 /* We've come to the end of the variable entries */
967 if (sv.rec_type != 2)
969 buf_unread(r, sizeof sv);
974 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
975 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
977 /* If there was a long string previously, make sure that the
978 continuations are present; otherwise make sure there aren't
980 if (long_string_count)
983 lose ((ME, _("%s: position %d: String variable does not have "
984 "proper number of continuation records."),
985 fh_get_file_name (r->fh), i));
988 r->vars[i].width = -1;
989 (*var_by_idx)[i] = NULL;
993 else if (sv.type == -1)
994 lose ((ME, _("%s: position %d: Superfluous long string continuation "
996 fh_get_file_name (r->fh), i));
998 /* Check fields for validity. */
999 if (sv.type < 0 || sv.type > 255)
1000 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1001 fh_get_file_name (r->fh), i, sv.type));
1002 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1003 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1004 "0 or 1."), fh_get_file_name (r->fh), i));
1005 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1006 || sv.n_missing_values == -1)
1007 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1008 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1010 /* Copy first character of variable name. */
1011 if (sv.name[0] == '@' || sv.name[0] == '#')
1012 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1014 fh_get_file_name (r->fh), i));
1016 name[0] = sv.name[0];
1018 /* Copy remaining characters of variable name. */
1019 for (j = 1; j < SHORT_NAME_LEN; j++)
1021 int c = (unsigned char) sv.name[j];
1030 if ( ! var_is_plausible_name(name, false) )
1031 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1032 fh_get_file_name (r->fh), name));
1034 /* Create variable. */
1035 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1037 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1038 fh_get_file_name (r->fh), name));
1040 /* Set the short name the same as the long name */
1041 var_set_short_name (vv, vv->name);
1043 /* Case reading data. */
1044 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1045 long_string_count = nv - 1;
1048 /* Get variable label, if any. */
1049 if (sv.has_var_label == 1)
1054 /* Read length of label. */
1055 assertive_buf_read (r, &len, sizeof len, 0);
1056 if (r->reverse_endian)
1060 if (len < 0 || len > 255)
1061 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1063 fh_get_file_name (r->fh), vv->name, len));
1067 /* Read label into variable structure. */
1068 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1069 if (vv->label == NULL)
1071 vv->label[len] = '\0';
1075 /* Set missing values. */
1076 if (sv.n_missing_values != 0)
1079 int mv_cnt = abs (sv.n_missing_values);
1081 if (vv->width > MAX_SHORT_STRING)
1082 lose ((ME, _("%s: Long string variable %s may not have missing "
1084 fh_get_file_name (r->fh), vv->name));
1086 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1088 if (r->reverse_endian && vv->type == NUMERIC)
1089 for (j = 0; j < mv_cnt; j++)
1090 bswap_flt64 (&mv[j]);
1092 if (sv.n_missing_values > 0)
1094 for (j = 0; j < sv.n_missing_values; j++)
1095 if (vv->type == NUMERIC)
1096 mv_add_num (&vv->miss, mv[j]);
1098 mv_add_str (&vv->miss, (char *) &mv[j]);
1102 if (vv->type == ALPHA)
1103 lose ((ME, _("%s: String variable %s may not have missing "
1104 "values specified as a range."),
1105 fh_get_file_name (r->fh), vv->name));
1107 if (mv[0] == r->lowest)
1108 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1109 else if (mv[1] == r->highest)
1110 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1112 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1114 if (sv.n_missing_values == -3)
1115 mv_add_num (&vv->miss, mv[2]);
1119 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1120 || !parse_format_spec (r, sv.write, &vv->write, vv))
1123 r->vars[i].width = vv->width;
1124 r->vars[i].fv = vv->fv;
1128 /* Some consistency checks. */
1129 if (long_string_count != 0)
1130 lose ((ME, _("%s: Long string continuation records omitted at end of "
1132 fh_get_file_name (r->fh)));
1134 if (next_value != r->value_cnt)
1135 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1136 "%d were read from file."),
1137 fh_get_file_name (r->fh), r->value_cnt, next_value);
1146 /* Translates the format spec from sysfile format to internal
1149 parse_format_spec (struct sfm_reader *r, int32_t s,
1150 struct fmt_spec *f, const struct variable *v)
1152 f->type = translate_fmt ((s >> 16) & 0xff);
1154 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1155 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1156 f->w = (s >> 8) & 0xff;
1159 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1160 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1161 fh_get_file_name (r->fh),
1162 v->type == ALPHA ? _("String") : _("Numeric"),
1164 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1165 formats[f->type].name));
1167 if (!check_output_specifier (f, false)
1168 || !check_specifier_width (f, v->width, false))
1170 msg (ME, _("%s variable %s has invalid format specifier %s."),
1171 v->type == NUMERIC ? _("Numeric") : _("String"),
1172 v->name, fmt_to_string (f));
1173 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1181 /* Reads value labels from sysfile H and inserts them into the
1182 associated dictionary. */
1184 read_value_labels (struct sfm_reader *r,
1185 struct dictionary *dict, struct variable **var_by_idx)
1189 char raw_value[8]; /* Value as uninterpreted bytes. */
1190 union value value; /* Value. */
1191 char *label; /* Null-terminated label string. */
1194 struct label *labels = NULL;
1195 int32_t n_labels; /* Number of labels. */
1197 struct variable **var = NULL; /* Associated variables. */
1198 int32_t n_vars; /* Number of associated variables. */
1202 /* First step: read the contents of the type 3 record and record its
1203 contents. Note that we can't do much with the data since we
1204 don't know yet whether it is of numeric or string type. */
1206 /* Read number of labels. */
1207 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1208 if (r->reverse_endian)
1209 bswap_int32 (&n_labels);
1211 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1213 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1214 fh_get_file_name (r->fh), n_labels);
1218 /* Allocate memory. */
1219 labels = xcalloc (n_labels, sizeof *labels);
1220 for (i = 0; i < n_labels; i++)
1221 labels[i].label = NULL;
1223 /* Read each value/label tuple into labels[]. */
1224 for (i = 0; i < n_labels; i++)
1226 struct label *label = labels + i;
1227 unsigned char label_len;
1231 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1233 /* Read label length. */
1234 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1235 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1237 /* Read label, padding. */
1238 label->label = xmalloc (padded_len + 1);
1239 assertive_buf_read (r, label->label, padded_len - 1, 0);
1240 label->label[label_len] = 0;
1243 /* Second step: Read the type 4 record that has the list of
1244 variables to which the value labels are to be applied. */
1246 /* Read record type of type 4 record. */
1250 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1251 if (r->reverse_endian)
1252 bswap_int32 (&rec_type);
1255 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1256 "follow value label record (type 3) as it should."),
1257 fh_get_file_name (r->fh)));
1260 /* Read number of variables associated with value label from type 4
1262 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1263 if (r->reverse_endian)
1264 bswap_int32 (&n_vars);
1265 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1266 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1267 "is not between 1 and the number of variables (%d)."),
1268 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1270 /* Read the list of variables. */
1271 var = xnmalloc (n_vars, sizeof *var);
1272 for (i = 0; i < n_vars; i++)
1277 /* Read variable index, check range. */
1278 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1279 if (r->reverse_endian)
1280 bswap_int32 (&var_idx);
1281 if (var_idx < 1 || var_idx > r->value_cnt)
1282 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1283 "not between 1 and the number of values (%d)."),
1284 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1286 /* Make sure it's a real variable. */
1287 v = var_by_idx[var_idx - 1];
1289 lose ((ME, _("%s: Variable index associated with value label (%d) "
1290 "refers to a continuation of a string variable, not to "
1291 "an actual variable."),
1292 fh_get_file_name (r->fh), var_idx));
1293 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1294 lose ((ME, _("%s: Value labels are not allowed on long string "
1296 fh_get_file_name (r->fh), v->name));
1298 /* Add it to the list of variables. */
1302 /* Type check the variables. */
1303 for (i = 1; i < n_vars; i++)
1304 if (var[i]->type != var[0]->type)
1305 lose ((ME, _("%s: Variables associated with value label are not all of "
1306 "identical type. Variable %s has %s type, but variable "
1308 fh_get_file_name (r->fh),
1309 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1310 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1312 /* Fill in labels[].value, now that we know the desired type. */
1313 for (i = 0; i < n_labels; i++)
1315 struct label *label = labels + i;
1317 if (var[0]->type == ALPHA)
1319 const int copy_len = min (sizeof label->raw_value,
1320 sizeof label->label);
1321 memcpy (label->value.s, label->raw_value, copy_len);
1324 assert (sizeof f == sizeof label->raw_value);
1325 memcpy (&f, label->raw_value, sizeof f);
1326 if (r->reverse_endian)
1332 /* Assign the value_label's to each variable. */
1333 for (i = 0; i < n_vars; i++)
1335 struct variable *v = var[i];
1338 /* Add each label to the variable. */
1339 for (j = 0; j < n_labels; j++)
1341 struct label *label = labels + j;
1342 if (!val_labs_replace (v->val_labs, label->value, label->label))
1345 if (var[0]->type == NUMERIC)
1346 msg (MW, _("%s: File contains duplicate label for value %g for "
1348 fh_get_file_name (r->fh), label->value.f, v->name);
1350 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1351 "for variable %s."),
1352 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1356 for (i = 0; i < n_labels; i++)
1357 free (labels[i].label);
1365 for (i = 0; i < n_labels; i++)
1366 free (labels[i].label);
1373 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1374 non-NULL, uses that as the buffer; otherwise allocates at least
1375 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1378 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1382 if (buf == NULL && byte_cnt > 0 )
1383 buf = xmalloc (max (byte_cnt, min_alloc));
1385 if ( byte_cnt == 0 )
1389 if (1 != fread (buf, byte_cnt, 1, r->file))
1391 if (ferror (r->file))
1392 msg (ME, _("%s: Reading system file: %s."),
1393 fh_get_file_name (r->fh), strerror (errno));
1395 corrupt_msg (ME, _("%s: Unexpected end of file."),
1396 fh_get_file_name (r->fh));
1404 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1406 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1408 assert(byte_cnt > 0);
1410 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1412 msg (ME, _("%s: Seeking system file: %s."),
1413 fh_get_file_name (r->fh), strerror (errno));
1417 /* Reads a document record, type 6, from system file R, and sets up
1418 the documents and n_documents fields in the associated
1421 read_documents (struct sfm_reader *r, struct dictionary *dict)
1426 if (dict_get_documents (dict) != NULL)
1427 lose ((ME, _("%s: System file contains multiple "
1428 "type 6 (document) records."),
1429 fh_get_file_name (r->fh)));
1431 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1433 lose ((ME, _("%s: Number of document lines (%ld) "
1434 "must be greater than 0."),
1435 fh_get_file_name (r->fh), (long) line_cnt));
1437 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1438 /* FIXME? Run through asciify. */
1439 if (documents == NULL)
1441 documents[80 * line_cnt] = '\0';
1442 dict_set_documents (dict, documents);
1452 /* Reads compressed data into H->BUF and sets other pointers
1453 appropriately. Returns nonzero only if both no errors occur and
1456 buffer_input (struct sfm_reader *r)
1463 r->buf = xnmalloc (128, sizeof *r->buf);
1464 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1465 if (ferror (r->file))
1467 msg (ME, _("%s: Error reading file: %s."),
1468 fh_get_file_name (r->fh), strerror (errno));
1473 r->end = &r->buf[amt];
1477 /* Reads a single case consisting of compressed data from system
1478 file H into the array BUF[] according to reader R, and
1479 returns nonzero only if successful. */
1480 /* Data in system files is compressed in this manner. Data
1481 values are grouped into sets of eight ("octets"). Each value
1482 in an octet has one instruction byte that are output together.
1483 Each instruction byte gives a value for that byte or indicates
1484 that the value can be found following the instructions. */
1486 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1488 const unsigned char *p_end = r->x + sizeof (flt64);
1489 unsigned char *p = r->y;
1491 const flt64 *buf_beg = buf;
1492 const flt64 *buf_end = &buf[r->value_cnt];
1496 for (; p < p_end; p++){
1500 /* Code 0 is ignored. */
1503 /* Code 252 is end of file. */
1506 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1507 "in partial case."),
1508 fh_get_file_name (r->fh)));
1510 /* Code 253 indicates that the value is stored explicitly
1511 following the instruction bytes. */
1512 if (r->ptr == NULL || r->ptr >= r->end)
1513 if (!buffer_input (r))
1514 lose ((ME, _("%s: Unexpected end of file."),
1515 fh_get_file_name (r->fh)));
1516 memcpy (buf++, r->ptr++, sizeof *buf);
1521 /* Code 254 indicates a string that is all blanks. */
1522 memset (buf++, ' ', sizeof *buf);
1527 /* Code 255 indicates the system-missing value. */
1529 if (r->reverse_endian)
1536 /* Codes 1 through 251 inclusive are taken to indicate a
1537 value of (BYTE - BIAS), where BYTE is the byte's value
1538 and BIAS is the compression bias (generally 100.0). */
1539 *buf = *p - r->bias;
1540 if (r->reverse_endian)
1548 /* We have reached the end of this instruction octet. Read
1550 if (r->ptr == NULL || r->ptr >= r->end)
1552 if (!buffer_input (r))
1555 lose ((ME, _("%s: Unexpected end of file."),
1556 fh_get_file_name (r->fh)));
1561 memcpy (r->x, r->ptr++, sizeof *buf);
1568 /* We have filled up an entire record. Update state and return
1579 /* Reads one case from READER's file into C. Returns nonzero
1580 only if successful. */
1582 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1587 if (!r->compressed && sizeof (flt64) == sizeof (double))
1589 /* Fast path: external and internal representations are the
1590 same, except possibly for endianness or SYSMIS. Read
1591 directly into the case's buffer, then fix up any minor
1592 details as needed. */
1593 if (!fread_ok (r, case_data_all_rw (c),
1594 sizeof (union value) * r->value_cnt))
1597 /* Fix up endianness if needed. */
1598 if (r->reverse_endian)
1602 for (i = 0; i < r->value_cnt; i++)
1603 if (r->vars[i].width == 0)
1604 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1607 /* Fix up SYSMIS values if needed.
1608 I don't think this will ever actually kick in, but it
1610 if (r->sysmis != SYSMIS)
1614 for (i = 0; i < r->value_cnt; i++)
1615 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1616 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1621 /* Slow path: internal and external representations differ.
1622 Read into a bounce buffer, then copy to C. */
1629 bounce_size = sizeof *bounce * r->value_cnt;
1630 bounce = bounce_cur = local_alloc (bounce_size);
1633 read_ok = fread_ok (r, bounce, bounce_size);
1635 read_ok = read_compressed_data (r, bounce);
1638 local_free (bounce);
1642 for (i = 0; i < r->value_cnt; i++)
1644 struct sfm_var *v = &r->vars[i];
1648 flt64 f = *bounce_cur++;
1649 if (r->reverse_endian)
1651 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1653 else if (v->width != -1)
1655 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1656 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1660 local_free (bounce);
1666 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1668 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1670 if (read_bytes == byte_cnt)
1674 if (ferror (r->file))
1676 msg (ME, _("%s: Reading system file: %s."),
1677 fh_get_file_name (r->fh), strerror (errno));
1680 else if (read_bytes != 0)
1682 msg (ME, _("%s: Partial record at end of system file."),
1683 fh_get_file_name (r->fh));
1690 /* Returns true if an I/O error has occurred on READER, false
1693 sfm_read_error (const struct sfm_reader *reader)
1698 /* Returns true if FILE is an SPSS system file,
1701 sfm_detect (FILE *file)
1703 struct sysfile_header hdr;
1705 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1707 if (strncmp ("$FL2", hdr.rec_type, 4))