1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include <libpspp/alloc.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/magic.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include "sys-file-reader.h"
38 #include "sfm-private.h"
40 #include "dictionary.h"
41 #include "file-handle-def.h"
42 #include "file-name.h"
44 #include "value-labels.h"
49 #define _(msgid) gettext (msgid)
51 /* System file reader. */
54 struct file_handle *fh; /* File handle. */
55 FILE *file; /* File stream. */
57 int reverse_endian; /* 1=file has endianness opposite us. */
58 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
59 int value_cnt; /* Number of `union values's per case. */
60 long case_cnt; /* Number of cases, -1 if unknown. */
61 int compressed; /* 1=compressed, 0=not compressed. */
62 double bias; /* Compression bias, usually 100.0. */
63 int weight_idx; /* 0-based index of weighting variable, or -1. */
64 bool ok; /* False after an I/O error or corrupt data. */
65 bool has_vls; /* True if the file has one or more Very Long Strings*/
68 struct hsh_table *var_hash;
69 struct variable **svars;
71 /* File's special constants. */
76 /* Decompression buffer. */
77 flt64 *buf; /* Buffer data. */
78 flt64 *ptr; /* Current location in buffer. */
79 flt64 *end; /* End of buffer data. */
81 /* Compression instruction octet. */
82 unsigned char x[8]; /* Current instruction octet. */
83 unsigned char *y; /* Location in current instruction octet. */
86 /* A variable in a system file. */
89 char name[SHORT_NAME_LEN + 1]; /* name */
90 int width; /* 0=numeric, otherwise string width. */
91 int fv; /* Index into case. */
96 /* Swap bytes *A and *B. */
98 bswap (char *a, char *b)
105 /* Reverse the byte order of 32-bit integer *X. */
107 bswap_int32 (int32_t *x_)
109 char *x = (char *) x_;
110 bswap (x + 0, x + 3);
111 bswap (x + 1, x + 2);
114 /* Reverse the byte order of 64-bit floating point *X. */
116 bswap_flt64 (flt64 *x_)
118 char *x = (char *) x_;
119 bswap (x + 0, x + 7);
120 bswap (x + 1, x + 6);
121 bswap (x + 2, x + 5);
122 bswap (x + 3, x + 4);
126 corrupt_msg (int class, const char *format,...)
127 PRINTF_FORMAT (2, 3);
129 /* Displays a corrupt sysfile error. */
131 corrupt_msg (int class, const char *format,...)
137 ds_init_cstr (&text, _("corrupt system file: "));
138 va_start (args, format);
139 ds_put_vformat (&text, format, args);
142 m.category = msg_class_to_category (class);
143 m.severity = msg_class_to_severity (class);
144 m.where.file_name = NULL;
145 m.where.line_number = 0;
146 m.text = ds_cstr (&text);
151 /* Closes a system file after we're done with it. */
153 sfm_close_reader (struct sfm_reader *r)
160 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
161 msg (ME, _("%s: Closing system file: %s."),
162 fh_get_file_name (r->fh), strerror (errno));
167 fh_close (r->fh, "system file", "rs");
169 hsh_destroy(r->var_hash);
174 /* Dictionary reader. */
176 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
178 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
181 static int read_header (struct sfm_reader *,
182 struct dictionary *, struct sfm_read_info *);
183 static int parse_format_spec (struct sfm_reader *, int32_t,
184 struct fmt_spec *, const struct variable *);
185 static int read_value_labels (struct sfm_reader *, struct dictionary *,
186 struct variable **var_by_idx);
187 static int read_variables (struct sfm_reader *,
188 struct dictionary *, struct variable ***var_by_idx);
189 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
190 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
191 static int read_documents (struct sfm_reader *, struct dictionary *);
193 static int fread_ok (struct sfm_reader *, void *, size_t);
195 /* Displays the message X with corrupt_msg, then jumps to the error
203 /* Calls buf_read with the specified arguments, and jumps to
204 error if the read fails. */
205 #define assertive_buf_read(a,b,c,d) \
207 if (!buf_read (a,b,c,d)) \
219 pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
223 const struct name_pair *p1 = _p1;
224 const struct name_pair *p2 = _p2;
226 char buf1[SHORT_NAME_LEN + 1];
227 char buf2[SHORT_NAME_LEN + 1];
229 memset(buf1, 0, SHORT_NAME_LEN + 1);
230 memset(buf2, 0, SHORT_NAME_LEN + 1);
232 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
234 buf1[i] = p1->shortname[i];
235 if ( '\0' == buf1[i])
239 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
241 buf2[i] = p2->shortname[i];
242 if ( '\0' == buf2[i])
246 return strncmp(buf1, buf2, SHORT_NAME_LEN);
250 pair_sn_hash(const void *_p, void *aux UNUSED)
253 const struct name_pair *p = _p;
254 char buf[SHORT_NAME_LEN + 1];
256 memset(buf, 0, SHORT_NAME_LEN + 1);
257 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
259 buf[i] = p->shortname[i];
264 return hsh_hash_bytes(buf, strlen(buf));
268 pair_sn_free(void *p, void *aux UNUSED)
275 /* A hsh_compare_func that orders variables A and B by their
278 compare_var_shortnames (const void *a_, const void *b_, void *foo UNUSED)
281 const struct variable *a = a_;
282 const struct variable *b = b_;
284 char buf1[SHORT_NAME_LEN + 1];
285 char buf2[SHORT_NAME_LEN + 1];
287 memset(buf1, 0, SHORT_NAME_LEN + 1);
288 memset(buf2, 0, SHORT_NAME_LEN + 1);
290 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
292 buf1[i] = a->short_name[i];
293 if ( '\0' == buf1[i])
297 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
299 buf2[i] = b->short_name[i];
300 if ( '\0' == buf2[i])
304 return strncmp(buf1, buf2, SHORT_NAME_LEN);
307 /* A hsh_hash_func that hashes variable V based on its name. */
309 hash_var_shortname (const void *v_, void *foo UNUSED)
312 const struct variable *v = v_;
313 char buf[SHORT_NAME_LEN + 1];
315 memset(buf, 0, SHORT_NAME_LEN + 1);
316 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
318 buf[i] = v->short_name[i];
323 return hsh_hash_bytes(buf, strlen(buf));
328 /* Opens the system file designated by file handle FH for
329 reading. Reads the system file's dictionary into *DICT.
330 If INFO is non-null, then it receives additional info about the
333 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
334 struct sfm_read_info *info)
336 struct sfm_reader *r = NULL;
337 struct variable **var_by_idx = NULL;
339 /* The data in record 7(14) */
340 char *subrec14data = 0;
342 /* A hash table of long variable names indexed by short name */
343 struct hsh_table *short_to_long = NULL;
346 *dict = dict_create ();
347 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
350 /* Create and initialize reader. */
351 r = xmalloc (sizeof *r);
353 r->file = fn_open (fh_get_file_name (fh), "rb");
355 r->reverse_endian = 0;
366 r->var_hash = hsh_create(4, compare_var_shortnames, hash_var_shortname, 0, 0);
368 r->sysmis = -FLT64_MAX;
369 r->highest = FLT64_MAX;
370 r->lowest = second_lowest_flt64;
372 r->buf = r->ptr = r->end = NULL;
373 r->y = r->x + sizeof r->x;
375 /* Check that file open succeeded. */
378 msg (ME, _("An error occurred while opening \"%s\" for reading "
379 "as a system file: %s."),
380 fh_get_file_name (r->fh), strerror (errno));
384 /* Read header and variables. */
385 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
389 /* Handle weighting. */
390 if (r->weight_idx != -1)
392 struct variable *weight_var;
394 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
395 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
396 "and number of elements per case (%d)."),
397 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
400 weight_var = var_by_idx[r->weight_idx];
402 if (weight_var == NULL)
404 _("%s: Weighting variable may not be a continuation of "
405 "a long string variable."), fh_get_file_name (fh)));
406 else if (weight_var->type == ALPHA)
407 lose ((ME, _("%s: Weighting variable may not be a string variable."),
408 fh_get_file_name (fh)));
410 dict_set_weight (*dict, weight_var);
413 dict_set_weight (*dict, NULL);
415 /* Read records of types 3, 4, 6, and 7. */
420 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
421 if (r->reverse_endian)
422 bswap_int32 (&rec_type);
428 if (!read_value_labels (r, *dict, var_by_idx))
433 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
434 "records must always immediately follow type 3 "
436 fh_get_file_name (r->fh)));
439 if (!read_documents (r, *dict))
456 assertive_buf_read (r, &data, sizeof data, 0);
457 if (r->reverse_endian)
459 bswap_int32 (&data.subtype);
460 bswap_int32 (&data.size);
461 bswap_int32 (&data.count);
463 bytes = data.size * data.count;
465 if (bytes < data.size || bytes < data.count)
466 lose ((ME, "%s: Record type %d subtype %d too large.",
467 fh_get_file_name (r->fh), rec_type, data.subtype));
469 switch (data.subtype)
472 if (!read_machine_int32_info (r, data.size, data.count))
477 if (!read_machine_flt64_info (r, data.size, data.count))
482 case 6: /* ?? Used by SPSS 8.0. */
486 case 11: /* Variable display parameters */
488 const int n_vars = data.count / 3 ;
490 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
492 msg (MW, _("%s: Invalid subrecord length. "
493 "Record: 7; Subrecord: 11"),
494 fh_get_file_name (r->fh));
499 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
511 assertive_buf_read (r, ¶ms, sizeof(params), 0);
513 if ( ! measure_is_valid(params.measure)
515 ! alignment_is_valid(params.align))
518 _("%s: Invalid variable display parameters. Default parameters substituted."),
519 fh_get_file_name(r->fh));
523 v = dict_get_var(*dict, i);
525 v->measure = params.measure;
526 v->display_width = params.width;
527 v->alignment = params.align;
532 case 13: /* SPSS 12.0 Long variable name map */
534 char *short_name, *save_ptr;
540 subrec14data = xmalloc (bytes + 1);
541 if (!buf_read (r, subrec14data, bytes, 0))
545 subrec14data[bytes] = '\0';
547 short_to_long = hsh_create(4,
554 for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
556 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
558 struct name_pair *pair ;
559 char *long_name = strtok_r (NULL, "\t", &save_ptr);
562 /* Validate long name. */
563 if (long_name == NULL)
565 msg (MW, _("%s: Trailing garbage in long variable "
567 fh_get_file_name (r->fh));
570 if (!var_is_valid_name (long_name, false))
572 msg (MW, _("%s: Long variable mapping to invalid "
573 "variable name `%s'."),
574 fh_get_file_name (r->fh), long_name);
578 /* Find variable using short name. */
579 v = dict_lookup_var (*dict, short_name);
582 msg (MW, _("%s: Long variable mapping for "
583 "nonexistent variable %s."),
584 fh_get_file_name (r->fh), short_name);
588 /* Identify any duplicates. */
589 if ( compare_var_names(short_name, long_name, 0) &&
590 NULL != dict_lookup_var (*dict, long_name))
591 lose ((ME, _("%s: Duplicate long variable name `%s' "
592 "within system file."),
593 fh_get_file_name (r->fh), long_name));
597 Renaming a variable may clear the short
598 name, but we want to retain it, so
599 re-set it explicitly. */
600 dict_rename_var (*dict, v, long_name);
601 var_set_short_name (v, short_name);
603 pair = xmalloc(sizeof *pair);
604 pair->shortname = short_name;
605 pair->longname = long_name;
606 hsh_insert(short_to_long, pair);
608 /* This messes up the processing of subtype 14 (below).
609 I'm not sure if it is needed anyway, so I'm removing it for
610 now. If it's needed, then it will need to be done after all the
611 records have been processed. --- JMD 27 April 2006
614 /* For compatability, make sure dictionary
615 is in long variable name map order. In
616 the common case, this has no effect,
617 because the dictionary and the long
618 variable name map are already in the
620 dict_reorder_var (*dict, v, idx);
630 bool eq_seen = false;
634 char *buffer = xmalloc (bytes + 1);
635 if (!buf_read (r, buffer, bytes, 0))
640 buffer[bytes] = '\0';
643 /* Note: SPSS v13 terminates this record with 00,
644 whereas SPSS v14 terminates it with 00 09. We must
646 for(i = 0; i < bytes ; ++i)
649 static char name[SHORT_NAME_LEN + 1] = {0};
650 static char len_str[6] ={0};
659 length = strtol(len_str, 0, 10);
660 if ( length != LONG_MAX && length != LONG_MIN)
662 char *lookup_name = name;
669 struct name_pair pair;
672 pair.shortname = name;
673 p = hsh_find(short_to_long, &pair);
675 lookup_name = p->longname;
678 v = dict_lookup_var(*dict, lookup_name);
682 _("%s: No variable called %s but it is listed in length table."),
683 fh_get_file_name (r->fh), lookup_name);
690 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
691 l -= EFFECTIVE_LONG_STRING_LENGTH;
698 struct variable *v_next;
699 v_next = dict_get_var(*dict, idx + 1);
701 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
702 l -= EFFECTIVE_LONG_STRING_LENGTH;
706 hsh_delete(r->var_hash, v_next);
708 dict_delete_var(*dict, v_next);
711 assert ( length > MAX_LONG_STRING );
714 v->print.w = v->width;
715 v->write.w = v->width;
716 v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
719 memset(name, 0, SHORT_NAME_LEN+1);
720 memset(len_str, 0, 6);
727 len_str[j] = buffer[i];
735 dict_compact_values(*dict);
740 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
741 "encountered in system file."),
742 fh_get_file_name (r->fh), data.subtype);
748 void *x = buf_read (r, NULL, data.size * data.count, 0);
760 assertive_buf_read (r, &filler, sizeof filler, 0);
766 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
767 fh_get_file_name (r->fh), rec_type);
772 /* Come here on successful completion. */
775 hsh_destroy(short_to_long);
780 /* Come here on unsuccessful completion. */
781 sfm_close_reader (r);
783 hsh_destroy(short_to_long);
787 dict_destroy (*dict);
793 /* Read record type 7, subtype 3. */
795 read_machine_int32_info (struct sfm_reader *r, int size, int count)
802 if (size != sizeof (int32_t) || count != 8)
803 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
804 "subtype 3. Expected size %d, count 8."),
805 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
807 assertive_buf_read (r, data, sizeof data, 0);
808 if (r->reverse_endian)
809 for (i = 0; i < 8; i++)
810 bswap_int32 (&data[i]);
814 lose ((ME, _("%s: Floating-point representation in system file is not "
815 "IEEE-754. PSPP cannot convert between floating-point "
817 fh_get_file_name (r->fh)));
819 #error Add support for your floating-point format.
822 #ifdef WORDS_BIGENDIAN
827 if (r->reverse_endian)
829 if (file_bigendian ^ (data[6] == 1))
830 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
831 "endianness intuited from file header (%s)."),
832 fh_get_file_name (r->fh),
833 file_bigendian ? _("big-endian") : _("little-endian"),
834 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
837 /* PORTME: Character representation code. */
838 if (data[7] != 2 && data[7] != 3)
839 lose ((ME, _("%s: File-indicated character representation code (%s) is "
841 fh_get_file_name (r->fh),
842 (data[7] == 1 ? "EBCDIC"
843 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
851 /* Read record type 7, subtype 4. */
853 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
858 if (size != sizeof (flt64) || count != 3)
859 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
860 "subtype 4. Expected size %d, count 8."),
861 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
863 assertive_buf_read (r, data, sizeof data, 0);
864 if (r->reverse_endian)
865 for (i = 0; i < 3; i++)
866 bswap_flt64 (&data[i]);
868 if (data[0] != SYSMIS || data[1] != FLT64_MAX
869 || data[2] != second_lowest_flt64)
872 r->highest = data[1];
874 msg (MW, _("%s: File-indicated value is different from internal value "
875 "for at least one of the three system values. SYSMIS: "
876 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
878 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
879 (double) data[1], (double) FLT64_MAX,
880 (double) data[2], (double) second_lowest_flt64);
890 read_header (struct sfm_reader *r,
891 struct dictionary *dict, struct sfm_read_info *info)
893 struct sysfile_header hdr; /* Disk buffer. */
894 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
895 int skip_amt = 0; /* Amount of product name to omit. */
898 /* Read header, check magic. */
899 assertive_buf_read (r, &hdr, sizeof hdr, 0);
900 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
901 lose ((ME, _("%s: Bad magic. Proper system files begin with "
902 "the four characters `$FL2'. This file will not be read."),
903 fh_get_file_name (r->fh)));
905 /* Check eye-category.her string. */
906 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
907 for (i = 0; i < 60; i++)
908 if (!c_isprint ((unsigned char) prod_name[i]))
910 for (i = 59; i >= 0; i--)
911 if (!c_isgraph ((unsigned char) prod_name[i]))
916 prod_name[60] = '\0';
920 static const char *prefix[N_PREFIXES] =
922 "@(#) SPSS DATA FILE",
928 for (i = 0; i < N_PREFIXES; i++)
929 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
931 skip_amt = strlen (prefix[i]);
936 /* Check endianness. */
937 if (hdr.layout_code == 2)
938 r->reverse_endian = 0;
941 bswap_int32 (&hdr.layout_code);
942 if (hdr.layout_code != 2)
943 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
944 "should be 2, in big-endian or little-endian format."),
945 fh_get_file_name (r->fh), hdr.layout_code));
947 r->reverse_endian = 1;
948 bswap_int32 (&hdr.nominal_case_size);
949 bswap_int32 (&hdr.compress);
950 bswap_int32 (&hdr.weight_idx);
951 bswap_int32 (&hdr.case_cnt);
952 bswap_flt64 (&hdr.bias);
956 /* Copy basic info and verify correctness. */
957 r->value_cnt = hdr.nominal_case_size;
959 /* If value count is ridiculous, then force it to -1 (a
961 if ( r->value_cnt < 0 ||
962 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
965 r->compressed = hdr.compress;
967 r->weight_idx = hdr.weight_idx - 1;
969 r->case_cnt = hdr.case_cnt;
970 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
972 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
973 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
976 if (r->bias != 100.0)
977 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
979 fh_get_file_name (r->fh), r->bias);
981 /* Make a file label only on the condition that the given label is
982 not all spaces or nulls. */
986 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
988 if (!c_isspace ((unsigned char) hdr.file_label[i])
989 && hdr.file_label[i] != 0)
991 char *label = xmalloc (i + 2);
992 memcpy (label, hdr.file_label, i + 1);
994 dict_set_label (dict, label);
1005 memcpy (info->creation_date, hdr.creation_date, 9);
1006 info->creation_date[9] = 0;
1008 memcpy (info->creation_time, hdr.creation_time, 8);
1009 info->creation_time[8] = 0;
1011 #ifdef WORDS_BIGENDIAN
1012 info->big_endian = !r->reverse_endian;
1014 info->big_endian = r->reverse_endian;
1017 info->compressed = hdr.compress;
1019 info->case_cnt = hdr.case_cnt;
1021 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
1022 if (c_isgraph ((unsigned char) *cp))
1024 strcpy (info->product, cp);
1033 /* Reads most of the dictionary from file H; also fills in the
1034 associated VAR_BY_IDX array. */
1036 read_variables (struct sfm_reader *r,
1037 struct dictionary *dict, struct variable ***var_by_idx)
1041 struct sysfile_variable sv; /* Disk buffer. */
1042 int long_string_count = 0; /* # of long string continuation
1043 records still expected. */
1044 int next_value = 0; /* Index to next `value' structure. */
1051 /* Read in the entry for each variable and use the info to
1052 initialize the dictionary. */
1055 struct variable *vv;
1056 char name[SHORT_NAME_LEN + 1];
1060 assertive_buf_read (r, &sv, sizeof sv, 0);
1062 if (r->reverse_endian)
1064 bswap_int32 (&sv.rec_type);
1065 bswap_int32 (&sv.type);
1066 bswap_int32 (&sv.has_var_label);
1067 bswap_int32 (&sv.n_missing_values);
1068 bswap_int32 (&sv.print);
1069 bswap_int32 (&sv.write);
1072 /* We've come to the end of the variable entries */
1073 if (sv.rec_type != 2)
1075 buf_unread(r, sizeof sv);
1080 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
1082 /* If there was a long string previously, make sure that the
1083 continuations are present; otherwise make sure there aren't
1085 if (long_string_count)
1088 lose ((ME, _("%s: position %d: String variable does not have "
1089 "proper number of continuation records."),
1090 fh_get_file_name (r->fh), i));
1093 (*var_by_idx)[i] = NULL;
1094 long_string_count--;
1097 else if (sv.type == -1)
1098 lose ((ME, _("%s: position %d: Superfluous long string continuation "
1100 fh_get_file_name (r->fh), i));
1102 /* Check fields for validity. */
1103 if (sv.type < 0 || sv.type > 255)
1104 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1105 fh_get_file_name (r->fh), i, sv.type));
1106 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1107 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1108 "0 or 1."), fh_get_file_name (r->fh), i));
1109 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1110 || sv.n_missing_values == -1)
1111 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1112 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1114 /* Copy first character of variable name. */
1115 if (sv.name[0] == '@' || sv.name[0] == '#')
1116 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1118 fh_get_file_name (r->fh), i));
1120 name[0] = sv.name[0];
1122 /* Copy remaining characters of variable name. */
1123 for (j = 1; j < SHORT_NAME_LEN; j++)
1125 int c = (unsigned char) sv.name[j];
1134 if ( ! var_is_plausible_name(name, false) )
1135 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1136 fh_get_file_name (r->fh), name));
1138 /* Create variable. */
1139 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1141 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1142 fh_get_file_name (r->fh), name));
1144 /* Set the short name the same as the long name */
1145 var_set_short_name (vv, vv->name);
1147 /* Case reading data. */
1148 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1149 long_string_count = nv - 1;
1152 /* Get variable label, if any. */
1153 if (sv.has_var_label == 1)
1158 /* Read length of label. */
1159 assertive_buf_read (r, &len, sizeof len, 0);
1160 if (r->reverse_endian)
1164 if (len < 0 || len > 255)
1165 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1167 fh_get_file_name (r->fh), vv->name, len));
1171 /* Read label into variable structure. */
1172 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1173 if (vv->label == NULL)
1175 vv->label[len] = '\0';
1179 /* Set missing values. */
1180 if (sv.n_missing_values != 0)
1183 int mv_cnt = abs (sv.n_missing_values);
1185 if (vv->width > MAX_SHORT_STRING)
1186 lose ((ME, _("%s: Long string variable %s may not have missing "
1188 fh_get_file_name (r->fh), vv->name));
1190 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1192 if (r->reverse_endian && vv->type == NUMERIC)
1193 for (j = 0; j < mv_cnt; j++)
1194 bswap_flt64 (&mv[j]);
1196 if (sv.n_missing_values > 0)
1198 for (j = 0; j < sv.n_missing_values; j++)
1199 if (vv->type == NUMERIC)
1200 mv_add_num (&vv->miss, mv[j]);
1202 mv_add_str (&vv->miss, (char *) &mv[j]);
1206 if (vv->type == ALPHA)
1207 lose ((ME, _("%s: String variable %s may not have missing "
1208 "values specified as a range."),
1209 fh_get_file_name (r->fh), vv->name));
1211 if (mv[0] == r->lowest)
1212 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1213 else if (mv[1] == r->highest)
1214 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1216 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1218 if (sv.n_missing_values == -3)
1219 mv_add_num (&vv->miss, mv[2]);
1223 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1224 || !parse_format_spec (r, sv.write, &vv->write, vv))
1227 if ( vv->width != -1)
1228 hsh_insert(r->var_hash, vv);
1231 /* Some consistency checks. */
1232 if (long_string_count != 0)
1233 lose ((ME, _("%s: Long string continuation records omitted at end of "
1235 fh_get_file_name (r->fh)));
1237 if (next_value != r->value_cnt)
1238 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1239 "%d were read from file."),
1240 fh_get_file_name (r->fh), r->value_cnt, next_value);
1249 /* Translates the format spec from sysfile format to internal
1252 parse_format_spec (struct sfm_reader *r, int32_t s,
1253 struct fmt_spec *f, const struct variable *v)
1255 f->type = translate_fmt ((s >> 16) & 0xff);
1257 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1258 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1259 f->w = (s >> 8) & 0xff;
1262 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1263 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1264 fh_get_file_name (r->fh),
1265 v->type == ALPHA ? _("String") : _("Numeric"),
1267 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1268 formats[f->type].name));
1270 if (!check_output_specifier (f, false)
1271 || !check_specifier_width (f, v->width, false))
1273 msg (ME, _("%s variable %s has invalid format specifier %s."),
1274 v->type == NUMERIC ? _("Numeric") : _("String"),
1275 v->name, fmt_to_string (f));
1276 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1284 /* Reads value labels from sysfile H and inserts them into the
1285 associated dictionary. */
1287 read_value_labels (struct sfm_reader *r,
1288 struct dictionary *dict, struct variable **var_by_idx)
1292 char raw_value[8]; /* Value as uninterpreted bytes. */
1293 union value value; /* Value. */
1294 char *label; /* Null-terminated label string. */
1297 struct label *labels = NULL;
1298 int32_t n_labels; /* Number of labels. */
1300 struct variable **var = NULL; /* Associated variables. */
1301 int32_t n_vars; /* Number of associated variables. */
1305 /* First step: read the contents of the type 3 record and record its
1306 contents. Note that we can't do much with the data since we
1307 don't know yet whether it is of numeric or string type. */
1309 /* Read number of labels. */
1310 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1311 if (r->reverse_endian)
1312 bswap_int32 (&n_labels);
1314 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1316 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1317 fh_get_file_name (r->fh), n_labels);
1321 /* Allocate memory. */
1322 labels = xcalloc (n_labels, sizeof *labels);
1323 for (i = 0; i < n_labels; i++)
1324 labels[i].label = NULL;
1326 /* Read each value/label tuple into labels[]. */
1327 for (i = 0; i < n_labels; i++)
1329 struct label *label = labels + i;
1330 unsigned char label_len;
1334 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1336 /* Read label length. */
1337 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1338 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1340 /* Read label, padding. */
1341 label->label = xmalloc (padded_len + 1);
1342 assertive_buf_read (r, label->label, padded_len - 1, 0);
1343 label->label[label_len] = 0;
1346 /* Second step: Read the type 4 record that has the list of
1347 variables to which the value labels are to be applied. */
1349 /* Read record type of type 4 record. */
1353 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1354 if (r->reverse_endian)
1355 bswap_int32 (&rec_type);
1358 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1359 "follow value label record (type 3) as it should."),
1360 fh_get_file_name (r->fh)));
1363 /* Read number of variables associated with value label from type 4
1365 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1366 if (r->reverse_endian)
1367 bswap_int32 (&n_vars);
1368 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1369 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1370 "is not between 1 and the number of variables (%d)."),
1371 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1373 /* Read the list of variables. */
1374 var = xnmalloc (n_vars, sizeof *var);
1375 for (i = 0; i < n_vars; i++)
1380 /* Read variable index, check range. */
1381 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1382 if (r->reverse_endian)
1383 bswap_int32 (&var_idx);
1384 if (var_idx < 1 || var_idx > r->value_cnt)
1385 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1386 "not between 1 and the number of values (%d)."),
1387 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1389 /* Make sure it's a real variable. */
1390 v = var_by_idx[var_idx - 1];
1392 lose ((ME, _("%s: Variable index associated with value label (%d) "
1393 "refers to a continuation of a string variable, not to "
1394 "an actual variable."),
1395 fh_get_file_name (r->fh), var_idx));
1396 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1397 lose ((ME, _("%s: Value labels are not allowed on long string "
1399 fh_get_file_name (r->fh), v->name));
1401 /* Add it to the list of variables. */
1405 /* Type check the variables. */
1406 for (i = 1; i < n_vars; i++)
1407 if (var[i]->type != var[0]->type)
1408 lose ((ME, _("%s: Variables associated with value label are not all of "
1409 "identical type. Variable %s has %s type, but variable "
1411 fh_get_file_name (r->fh),
1412 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1413 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1415 /* Fill in labels[].value, now that we know the desired type. */
1416 for (i = 0; i < n_labels; i++)
1418 struct label *label = labels + i;
1420 if (var[0]->type == ALPHA)
1422 const int copy_len = min (sizeof label->raw_value,
1423 sizeof label->label);
1424 memcpy (label->value.s, label->raw_value, copy_len);
1427 assert (sizeof f == sizeof label->raw_value);
1428 memcpy (&f, label->raw_value, sizeof f);
1429 if (r->reverse_endian)
1435 /* Assign the value_label's to each variable. */
1436 for (i = 0; i < n_vars; i++)
1438 struct variable *v = var[i];
1441 /* Add each label to the variable. */
1442 for (j = 0; j < n_labels; j++)
1444 struct label *label = labels + j;
1445 if (!val_labs_replace (v->val_labs, label->value, label->label))
1448 if (var[0]->type == NUMERIC)
1449 msg (MW, _("%s: File contains duplicate label for value %g for "
1451 fh_get_file_name (r->fh), label->value.f, v->name);
1453 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1454 "for variable %s."),
1455 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1459 for (i = 0; i < n_labels; i++)
1460 free (labels[i].label);
1468 for (i = 0; i < n_labels; i++)
1469 free (labels[i].label);
1476 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1477 non-NULL, uses that as the buffer; otherwise allocates at least
1478 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1481 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1485 if (buf == NULL && byte_cnt > 0 )
1486 buf = xmalloc (max (byte_cnt, min_alloc));
1488 if ( byte_cnt == 0 )
1492 if (1 != fread (buf, byte_cnt, 1, r->file))
1494 if (ferror (r->file))
1495 msg (ME, _("%s: Reading system file: %s."),
1496 fh_get_file_name (r->fh), strerror (errno));
1498 corrupt_msg (ME, _("%s: Unexpected end of file."),
1499 fh_get_file_name (r->fh));
1507 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1509 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1511 assert(byte_cnt > 0);
1513 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1515 msg (ME, _("%s: Seeking system file: %s."),
1516 fh_get_file_name (r->fh), strerror (errno));
1520 /* Reads a document record, type 6, from system file R, and sets up
1521 the documents and n_documents fields in the associated
1524 read_documents (struct sfm_reader *r, struct dictionary *dict)
1529 if (dict_get_documents (dict) != NULL)
1530 lose ((ME, _("%s: System file contains multiple "
1531 "type 6 (document) records."),
1532 fh_get_file_name (r->fh)));
1534 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1536 lose ((ME, _("%s: Number of document lines (%ld) "
1537 "must be greater than 0."),
1538 fh_get_file_name (r->fh), (long) line_cnt));
1540 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1541 /* FIXME? Run through asciify. */
1542 if (documents == NULL)
1544 documents[80 * line_cnt] = '\0';
1545 dict_set_documents (dict, documents);
1555 /* Reads compressed data into H->BUF and sets other pointers
1556 appropriately. Returns nonzero only if both no errors occur and
1559 buffer_input (struct sfm_reader *r)
1566 r->buf = xnmalloc (128, sizeof *r->buf);
1567 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1568 if (ferror (r->file))
1570 msg (ME, _("%s: Error reading file: %s."),
1571 fh_get_file_name (r->fh), strerror (errno));
1576 r->end = &r->buf[amt];
1580 /* Reads a single case consisting of compressed data from system
1581 file H into the array BUF[] according to reader R, and
1582 returns nonzero only if successful. */
1583 /* Data in system files is compressed in this manner. Data
1584 values are grouped into sets of eight ("octets"). Each value
1585 in an octet has one instruction byte that are output together.
1586 Each instruction byte gives a value for that byte or indicates
1587 that the value can be found following the instructions. */
1589 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1591 const unsigned char *p_end = r->x + sizeof (flt64);
1592 unsigned char *p = r->y;
1594 const flt64 *buf_beg = buf;
1595 const flt64 *buf_end = &buf[r->value_cnt];
1599 for (; p < p_end; p++){
1603 /* Code 0 is ignored. */
1606 /* Code 252 is end of file. */
1609 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1610 "in partial case."),
1611 fh_get_file_name (r->fh)));
1613 /* Code 253 indicates that the value is stored explicitly
1614 following the instruction bytes. */
1615 if (r->ptr == NULL || r->ptr >= r->end)
1616 if (!buffer_input (r))
1617 lose ((ME, _("%s: Unexpected end of file."),
1618 fh_get_file_name (r->fh)));
1619 memcpy (buf++, r->ptr++, sizeof *buf);
1624 /* Code 254 indicates a string that is all blanks. */
1625 memset (buf++, ' ', sizeof *buf);
1630 /* Code 255 indicates the system-missing value. */
1632 if (r->reverse_endian)
1639 /* Codes 1 through 251 inclusive are taken to indicate a
1640 value of (BYTE - BIAS), where BYTE is the byte's value
1641 and BIAS is the compression bias (generally 100.0). */
1642 *buf = *p - r->bias;
1643 if (r->reverse_endian)
1651 /* We have reached the end of this instruction octet. Read
1653 if (r->ptr == NULL || r->ptr >= r->end)
1655 if (!buffer_input (r))
1658 lose ((ME, _("%s: Unexpected end of file."),
1659 fh_get_file_name (r->fh)));
1664 memcpy (r->x, r->ptr++, sizeof *buf);
1671 /* We have filled up an entire record. Update state and return
1684 compare_var_index(const void *_v1, const void *_v2, void *aux UNUSED)
1686 const struct variable *const *v1 = _v1;
1687 const struct variable *const *v2 = _v2;
1689 if ( (*v1)->index < (*v2)->index)
1692 return ( (*v1)->index > (*v2)->index) ;
1696 /* Reads one case from READER's file into C. Returns nonzero
1697 only if successful. */
1699 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1706 r->svars = (struct variable **) hsh_data(r->var_hash);
1707 sort(r->svars, hsh_count(r->var_hash),
1708 sizeof(*r->svars), compare_var_index, 0);
1711 if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
1713 /* Fast path: external and internal representations are the
1714 same, except possibly for endianness or SYSMIS. Read
1715 directly into the case's buffer, then fix up any minor
1716 details as needed. */
1717 if (!fread_ok (r, case_data_all_rw (c),
1718 sizeof (union value) * r->value_cnt))
1721 /* Fix up endianness if needed. */
1722 if (r->reverse_endian)
1726 for (i = 0; i < hsh_count(r->var_hash); i++)
1728 struct variable *v = r->svars[i];
1730 bswap_flt64 (&case_data_rw (c, v->fv)->f);
1734 /* Fix up SYSMIS values if needed.
1735 I don't think this will ever actually kick in, but it
1737 if (r->sysmis != SYSMIS)
1740 for (i = 0; i < hsh_count(r->var_hash); i++)
1742 struct variable *v = r->svars[i];
1743 if (v->width == 0 && case_num (c, i) == r->sysmis)
1744 case_data_rw (c, v->fv)->f = SYSMIS;
1750 /* Slow path: internal and external representations differ.
1751 Read into a bounce buffer, then copy to C. */
1758 bounce_size = sizeof *bounce * r->value_cnt;
1759 bounce = bounce_cur = local_alloc (bounce_size);
1761 memset(bounce, 0, bounce_size);
1764 read_ok = fread_ok (r, bounce, bounce_size);
1766 read_ok = read_compressed_data (r, bounce);
1769 local_free (bounce);
1773 for (i = 0; i < hsh_count(r->var_hash); i++)
1775 struct variable *tv = r->svars[i];
1779 flt64 f = *bounce_cur++;
1780 if (r->reverse_endian)
1782 case_data_rw (c, tv->fv)->f = f == r->sysmis ? SYSMIS : f;
1784 else if (tv->width != -1)
1786 flt64 *bc_start = bounce_cur;
1788 while (ofs < tv->width )
1790 const int chunk = MIN (MAX_LONG_STRING, tv->width - ofs);
1791 memcpy (case_data_rw (c, tv->fv)->s + ofs, bounce_cur, chunk);
1793 bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
1797 bounce_cur = bc_start + width_to_bytes(tv->width) / sizeof(flt64);
1801 local_free (bounce);
1807 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1809 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1811 if (read_bytes == byte_cnt)
1815 if (ferror (r->file))
1817 msg (ME, _("%s: Reading system file: %s."),
1818 fh_get_file_name (r->fh), strerror (errno));
1821 else if (read_bytes != 0)
1823 msg (ME, _("%s: Partial record at end of system file."),
1824 fh_get_file_name (r->fh));
1831 /* Returns true if an I/O error has occurred on READER, false
1834 sfm_read_error (const struct sfm_reader *reader)
1839 /* Returns true if FILE is an SPSS system file,
1842 sfm_detect (FILE *file)
1844 struct sysfile_header hdr;
1846 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1848 if (strncmp ("$FL2", hdr.rec_type, 4))