1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include <libpspp/alloc.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/magic.h>
33 #include <libpspp/misc.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include "sys-file-reader.h"
39 #include "sfm-private.h"
41 #include "dictionary.h"
42 #include "file-handle-def.h"
43 #include "file-name.h"
45 #include "value-labels.h"
50 #define _(msgid) gettext (msgid)
52 /* System file reader. */
55 struct file_handle *fh; /* File handle. */
56 FILE *file; /* File stream. */
58 int reverse_endian; /* 1=file has endianness opposite us. */
59 int value_cnt; /* Number of `union values's per case. */
60 long case_cnt; /* Number of cases, -1 if unknown. */
61 int compressed; /* 1=compressed, 0=not compressed. */
62 double bias; /* Compression bias, usually 100.0. */
63 int weight_idx; /* 0-based index of weighting variable, or -1. */
64 bool ok; /* False after an I/O error or corrupt data. */
65 bool has_vls; /* True if the file has one or more Very Long Strings*/
71 /* File's special constants. */
76 /* Decompression buffer. */
77 flt64 *buf; /* Buffer data. */
78 flt64 *ptr; /* Current location in buffer. */
79 flt64 *end; /* End of buffer data. */
81 /* Compression instruction octet. */
82 unsigned char x[8]; /* Current instruction octet. */
83 unsigned char *y; /* Location in current instruction octet. */
86 /* A variable in a system file. */
89 int width; /* 0=numeric, otherwise string width. */
90 int fv; /* Index into case. */
95 /* Swap bytes *A and *B. */
97 bswap (char *a, char *b)
104 /* Reverse the byte order of 32-bit integer *X. */
106 bswap_int32 (int32_t *x_)
108 char *x = (char *) x_;
109 bswap (x + 0, x + 3);
110 bswap (x + 1, x + 2);
113 /* Reverse the byte order of 64-bit floating point *X. */
115 bswap_flt64 (flt64 *x_)
117 char *x = (char *) x_;
118 bswap (x + 0, x + 7);
119 bswap (x + 1, x + 6);
120 bswap (x + 2, x + 5);
121 bswap (x + 3, x + 4);
125 corrupt_msg (int class, const char *format,...)
126 PRINTF_FORMAT (2, 3);
128 /* Displays a corrupt sysfile error. */
130 corrupt_msg (int class, const char *format,...)
136 ds_init_cstr (&text, _("corrupt system file: "));
137 va_start (args, format);
138 ds_put_vformat (&text, format, args);
141 m.category = msg_class_to_category (class);
142 m.severity = msg_class_to_severity (class);
143 m.where.file_name = NULL;
144 m.where.line_number = 0;
145 m.text = ds_cstr (&text);
150 /* Closes a system file after we're done with it. */
152 sfm_close_reader (struct sfm_reader *r)
159 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
160 msg (ME, _("%s: Closing system file: %s."),
161 fh_get_file_name (r->fh), strerror (errno));
166 fh_close (r->fh, "system file", "rs");
173 /* Dictionary reader. */
175 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
177 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
180 static int read_header (struct sfm_reader *,
181 struct dictionary *, struct sfm_read_info *);
182 static int parse_format_spec (struct sfm_reader *, int32_t,
183 struct fmt_spec *, const struct variable *);
184 static int read_value_labels (struct sfm_reader *, struct dictionary *,
185 struct variable **var_by_idx);
186 static int read_variables (struct sfm_reader *,
187 struct dictionary *, struct variable ***var_by_idx);
188 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
189 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
190 static int read_documents (struct sfm_reader *, struct dictionary *);
192 static int fread_ok (struct sfm_reader *, void *, size_t);
194 /* Displays the message X with corrupt_msg, then jumps to the error
202 /* Calls buf_read with the specified arguments, and jumps to
203 error if the read fails. */
204 #define assertive_buf_read(a,b,c,d) \
206 if (!buf_read (a,b,c,d)) \
218 pair_sn_compare(const void *_p1, const void *_p2, const void *aux UNUSED)
222 const struct name_pair *p1 = _p1;
223 const struct name_pair *p2 = _p2;
225 char buf1[SHORT_NAME_LEN + 1];
226 char buf2[SHORT_NAME_LEN + 1];
228 memset(buf1, 0, SHORT_NAME_LEN + 1);
229 memset(buf2, 0, SHORT_NAME_LEN + 1);
231 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
233 buf1[i] = p1->shortname[i];
234 if ( '\0' == buf1[i])
238 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
240 buf2[i] = p2->shortname[i];
241 if ( '\0' == buf2[i])
245 return strncmp(buf1, buf2, SHORT_NAME_LEN);
249 pair_sn_hash(const void *_p, const void *aux UNUSED)
252 const struct name_pair *p = _p;
253 char buf[SHORT_NAME_LEN + 1];
255 memset(buf, 0, SHORT_NAME_LEN + 1);
256 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
258 buf[i] = p->shortname[i];
263 return hsh_hash_bytes(buf, strlen(buf));
267 pair_sn_free(void *p, const void *aux UNUSED)
274 /* Opens the system file designated by file handle FH for
275 reading. Reads the system file's dictionary into *DICT.
276 If INFO is non-null, then it receives additional info about the
279 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
280 struct sfm_read_info *info)
282 struct sfm_reader *r = NULL;
283 struct variable **var_by_idx = NULL;
285 /* The data in record 7(14) */
286 char *subrec14data = 0;
288 /* A hash table of long variable names indexed by short name */
289 struct hsh_table *short_to_long = NULL;
291 *dict = dict_create ();
292 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
295 /* Create and initialize reader. */
296 r = xmalloc (sizeof *r);
298 r->file = fn_open (fh_get_file_name (fh), "rb");
300 r->reverse_endian = 0;
311 r->sysmis = -FLT64_MAX;
312 r->highest = FLT64_MAX;
313 r->lowest = second_lowest_flt64;
315 r->buf = r->ptr = r->end = NULL;
316 r->y = r->x + sizeof r->x;
318 /* Check that file open succeeded. */
321 msg (ME, _("An error occurred while opening \"%s\" for reading "
322 "as a system file: %s."),
323 fh_get_file_name (r->fh), strerror (errno));
327 /* Read header and variables. */
328 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
332 /* Handle weighting. */
333 if (r->weight_idx != -1)
335 struct variable *weight_var;
337 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
338 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
339 "and number of elements per case (%d)."),
340 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
343 weight_var = var_by_idx[r->weight_idx];
345 if (weight_var == NULL)
347 _("%s: Weighting variable may not be a continuation of "
348 "a long string variable."), fh_get_file_name (fh)));
349 else if (weight_var->type == ALPHA)
350 lose ((ME, _("%s: Weighting variable may not be a string variable."),
351 fh_get_file_name (fh)));
353 dict_set_weight (*dict, weight_var);
356 dict_set_weight (*dict, NULL);
358 /* Read records of types 3, 4, 6, and 7. */
363 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
364 if (r->reverse_endian)
365 bswap_int32 (&rec_type);
371 if (!read_value_labels (r, *dict, var_by_idx))
376 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
377 "records must always immediately follow type 3 "
379 fh_get_file_name (r->fh)));
382 if (!read_documents (r, *dict))
393 } ATTRIBUTE((packed))
399 assertive_buf_read (r, &data, sizeof data, 0);
400 if (r->reverse_endian)
402 bswap_int32 (&data.subtype);
403 bswap_int32 (&data.size);
404 bswap_int32 (&data.count);
406 bytes = data.size * data.count;
408 if (bytes < data.size || bytes < data.count)
409 lose ((ME, "%s: Record type %d subtype %d too large.",
410 fh_get_file_name (r->fh), rec_type, data.subtype));
412 switch (data.subtype)
415 if (!read_machine_int32_info (r, data.size, data.count))
420 if (!read_machine_flt64_info (r, data.size, data.count))
425 case 6: /* ?? Used by SPSS 8.0. */
429 case 11: /* Variable display parameters */
431 const int n_vars = data.count / 3 ;
433 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
435 msg (MW, _("%s: Invalid subrecord length. "
436 "Record: 7; Subrecord: 11"),
437 fh_get_file_name (r->fh));
442 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
449 } ATTRIBUTE((packed))
454 assertive_buf_read (r, ¶ms, sizeof(params), 0);
456 if ( ! measure_is_valid(params.measure)
458 ! alignment_is_valid(params.align))
461 _("%s: Invalid variable display parameters. Default parameters substituted."),
462 fh_get_file_name(r->fh));
466 v = dict_get_var(*dict, i);
468 v->measure = params.measure;
469 v->display_width = params.width;
470 v->alignment = params.align;
475 case 13: /* SPSS 12.0 Long variable name map */
478 char *save_ptr = NULL;
482 subrec14data = xmalloc (bytes + 1);
483 if (!buf_read (r, subrec14data, bytes, 0))
487 subrec14data[bytes] = '\0';
489 short_to_long = hsh_create(4,
496 for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
498 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
500 struct name_pair *pair ;
501 char *long_name = strtok_r (NULL, "\t", &save_ptr);
504 /* Validate long name. */
505 if (long_name == NULL)
507 msg (MW, _("%s: Trailing garbage in long variable "
509 fh_get_file_name (r->fh));
512 if (!var_is_valid_name (long_name, false))
514 msg (MW, _("%s: Long variable mapping to invalid "
515 "variable name `%s'."),
516 fh_get_file_name (r->fh), long_name);
520 /* Find variable using short name. */
521 v = dict_lookup_var (*dict, short_name);
524 msg (MW, _("%s: Long variable mapping for "
525 "nonexistent variable %s."),
526 fh_get_file_name (r->fh), short_name);
530 /* Identify any duplicates. */
531 if ( compare_var_names(short_name, long_name, 0) &&
532 NULL != dict_lookup_var (*dict, long_name))
533 lose ((ME, _("%s: Duplicate long variable name `%s' "
534 "within system file."),
535 fh_get_file_name (r->fh), long_name));
539 Renaming a variable may clear the short
540 name, but we want to retain it, so
541 re-set it explicitly. */
542 dict_rename_var (*dict, v, long_name);
543 var_set_short_name (v, short_name);
545 pair = xmalloc(sizeof *pair);
546 pair->shortname = short_name;
547 pair->longname = long_name;
548 hsh_insert(short_to_long, pair);
550 /* This messes up the processing of subtype 14 (below).
551 I'm not sure if it is needed anyway, so I'm removing it for
552 now. If it's needed, then it will need to be done after all the
553 records have been processed. --- JMD 27 April 2006
556 /* For compatibility, make sure dictionary
557 is in long variable name map order. In
558 the common case, this has no effect,
559 because the dictionary and the long
560 variable name map are already in the
562 dict_reorder_var (*dict, v, idx);
572 bool eq_seen = false;
576 char *buffer = xmalloc (bytes + 1);
577 if (!buf_read (r, buffer, bytes, 0))
582 buffer[bytes] = '\0';
586 /* Note: SPSS v13 terminates this record with 00,
587 whereas SPSS v14 terminates it with 00 09. We must
589 for(i = 0; i < bytes ; ++i)
592 static char name[SHORT_NAME_LEN + 1] = {0};
593 static char len_str[6] ={0};
602 length = strtol(len_str, 0, 10);
603 if ( length != LONG_MAX && length != LONG_MIN)
605 char *lookup_name = name;
612 struct name_pair pair;
615 pair.shortname = name;
616 p = hsh_find(short_to_long, &pair);
618 lookup_name = p->longname;
621 v = dict_lookup_var(*dict, lookup_name);
625 _("%s: No variable called %s but it is listed in length table."),
626 fh_get_file_name (r->fh), lookup_name);
633 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
634 l -= EFFECTIVE_LONG_STRING_LENGTH;
641 struct variable *v_next;
642 v_next = dict_get_var(*dict, idx + 1);
644 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
645 l -= EFFECTIVE_LONG_STRING_LENGTH;
649 dict_delete_var(*dict, v_next);
652 assert ( length > MAX_LONG_STRING );
655 v->print.w = v->width;
656 v->write.w = v->width;
657 v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
660 memset(name, 0, SHORT_NAME_LEN+1);
661 memset(len_str, 0, 6);
668 len_str[j] = buffer[i];
676 dict_compact_values(*dict);
681 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
682 "encountered in system file."),
683 fh_get_file_name (r->fh), data.subtype);
689 void *x = buf_read (r, NULL, data.size * data.count, 0);
701 assertive_buf_read (r, &filler, sizeof filler, 0);
707 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
708 fh_get_file_name (r->fh), rec_type);
713 /* Come here on successful completion. */
715 /* Create an index of dictionary variable widths for
716 sfm_read_case to use. We cannot use the `struct variables'
717 from the dictionary we created, because the caller owns the
718 dictionary and may destroy or modify its variables. */
722 r->var_cnt = dict_get_var_cnt (*dict);
723 r->vars = xnmalloc (r->var_cnt, sizeof *r->vars);
724 for (i = 0; i < r->var_cnt; i++)
726 struct variable *v = dict_get_var (*dict, i);
727 struct sfm_var *sv = &r->vars[i];
728 sv->width = v->width;
734 hsh_destroy(short_to_long);
739 /* Come here on unsuccessful completion. */
740 sfm_close_reader (r);
742 hsh_destroy(short_to_long);
746 dict_destroy (*dict);
752 /* Read record type 7, subtype 3. */
754 read_machine_int32_info (struct sfm_reader *r, int size, int count)
761 if (size != sizeof (int32_t) || count != 8)
762 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
763 "subtype 3. Expected size %d, count 8."),
764 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
766 assertive_buf_read (r, data, sizeof data, 0);
767 if (r->reverse_endian)
768 for (i = 0; i < 8; i++)
769 bswap_int32 (&data[i]);
773 lose ((ME, _("%s: Floating-point representation in system file is not "
774 "IEEE-754. PSPP cannot convert between floating-point "
776 fh_get_file_name (r->fh)));
778 #error Add support for your floating-point format.
781 #ifdef WORDS_BIGENDIAN
786 if (r->reverse_endian)
788 if (file_bigendian ^ (data[6] == 1))
789 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
790 "endianness intuited from file header (%s)."),
791 fh_get_file_name (r->fh),
792 file_bigendian ? _("big-endian") : _("little-endian"),
793 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
796 /* PORTME: Character representation code. */
797 if (data[7] != 2 && data[7] != 3)
798 lose ((ME, _("%s: File-indicated character representation code (%s) is "
800 fh_get_file_name (r->fh),
801 (data[7] == 1 ? "EBCDIC"
802 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
810 /* Read record type 7, subtype 4. */
812 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
817 if (size != sizeof (flt64) || count != 3)
818 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
819 "subtype 4. Expected size %d, count 8."),
820 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
822 assertive_buf_read (r, data, sizeof data, 0);
823 if (r->reverse_endian)
824 for (i = 0; i < 3; i++)
825 bswap_flt64 (&data[i]);
827 if (data[0] != SYSMIS || data[1] != FLT64_MAX
828 || data[2] != second_lowest_flt64)
831 r->highest = data[1];
833 msg (MW, _("%s: File-indicated value is different from internal value "
834 "for at least one of the three system values. SYSMIS: "
835 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
837 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
838 (double) data[1], (double) FLT64_MAX,
839 (double) data[2], (double) second_lowest_flt64);
849 read_header (struct sfm_reader *r,
850 struct dictionary *dict, struct sfm_read_info *info)
852 struct sysfile_header hdr; /* Disk buffer. */
853 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
854 int skip_amt = 0; /* Amount of product name to omit. */
857 /* Read header, check magic. */
858 assertive_buf_read (r, &hdr, sizeof hdr, 0);
859 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
860 lose ((ME, _("%s: Bad magic. Proper system files begin with "
861 "the four characters `$FL2'. This file will not be read."),
862 fh_get_file_name (r->fh)));
864 /* Check eye-category.her string. */
865 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
866 for (i = 0; i < 60; i++)
867 if (!c_isprint ((unsigned char) prod_name[i]))
869 for (i = 59; i >= 0; i--)
870 if (!c_isgraph ((unsigned char) prod_name[i]))
875 prod_name[60] = '\0';
879 static const char *prefix[N_PREFIXES] =
881 "@(#) SPSS DATA FILE",
887 for (i = 0; i < N_PREFIXES; i++)
888 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
890 skip_amt = strlen (prefix[i]);
895 /* Check endianness. */
896 if (hdr.layout_code == 2)
897 r->reverse_endian = 0;
900 bswap_int32 (&hdr.layout_code);
901 if (hdr.layout_code != 2)
902 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
903 "should be 2, in big-endian or little-endian format."),
904 fh_get_file_name (r->fh), hdr.layout_code));
906 r->reverse_endian = 1;
907 bswap_int32 (&hdr.nominal_case_size);
908 bswap_int32 (&hdr.compress);
909 bswap_int32 (&hdr.weight_idx);
910 bswap_int32 (&hdr.case_cnt);
911 bswap_flt64 (&hdr.bias);
915 /* Copy basic info and verify correctness. */
916 r->value_cnt = hdr.nominal_case_size;
918 /* If value count is ridiculous, then force it to -1 (a
920 if ( r->value_cnt < 0 ||
921 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
924 r->compressed = hdr.compress;
926 r->weight_idx = hdr.weight_idx - 1;
928 r->case_cnt = hdr.case_cnt;
929 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
931 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
932 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
935 if (r->bias != 100.0)
936 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
938 fh_get_file_name (r->fh), r->bias);
940 /* Make a file label only on the condition that the given label is
941 not all spaces or nulls. */
945 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
947 if (!c_isspace ((unsigned char) hdr.file_label[i])
948 && hdr.file_label[i] != 0)
950 char *label = xmalloc (i + 2);
951 memcpy (label, hdr.file_label, i + 1);
953 dict_set_label (dict, label);
964 memcpy (info->creation_date, hdr.creation_date, 9);
965 info->creation_date[9] = 0;
967 memcpy (info->creation_time, hdr.creation_time, 8);
968 info->creation_time[8] = 0;
970 #ifdef WORDS_BIGENDIAN
971 info->big_endian = !r->reverse_endian;
973 info->big_endian = r->reverse_endian;
976 info->compressed = hdr.compress;
978 info->case_cnt = hdr.case_cnt;
980 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
981 if (c_isgraph ((unsigned char) *cp))
983 strcpy (info->product, cp);
992 /* Reads most of the dictionary from file H; also fills in the
993 associated VAR_BY_IDX array. */
995 read_variables (struct sfm_reader *r,
996 struct dictionary *dict, struct variable ***var_by_idx)
1000 struct sysfile_variable sv; /* Disk buffer. */
1001 int long_string_count = 0; /* # of long string continuation
1002 records still expected. */
1003 int next_value = 0; /* Index to next `value' structure. */
1010 /* Read in the entry for each variable and use the info to
1011 initialize the dictionary. */
1014 struct variable *vv;
1015 char name[SHORT_NAME_LEN + 1];
1019 assertive_buf_read (r, &sv, sizeof sv, 0);
1021 if (r->reverse_endian)
1023 bswap_int32 (&sv.rec_type);
1024 bswap_int32 (&sv.type);
1025 bswap_int32 (&sv.has_var_label);
1026 bswap_int32 (&sv.n_missing_values);
1027 bswap_int32 (&sv.print);
1028 bswap_int32 (&sv.write);
1031 /* We've come to the end of the variable entries */
1032 if (sv.rec_type != 2)
1034 buf_unread(r, sizeof sv);
1039 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
1041 /* If there was a long string previously, make sure that the
1042 continuations are present; otherwise make sure there aren't
1044 if (long_string_count)
1047 lose ((ME, _("%s: position %d: String variable does not have "
1048 "proper number of continuation records."),
1049 fh_get_file_name (r->fh), i));
1052 (*var_by_idx)[i] = NULL;
1053 long_string_count--;
1056 else if (sv.type == -1)
1057 lose ((ME, _("%s: position %d: Superfluous long string continuation "
1059 fh_get_file_name (r->fh), i));
1061 /* Check fields for validity. */
1062 if (sv.type < 0 || sv.type > 255)
1063 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1064 fh_get_file_name (r->fh), i, sv.type));
1065 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1066 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1067 "0 or 1."), fh_get_file_name (r->fh), i));
1068 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1069 || sv.n_missing_values == -1)
1070 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1071 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1073 /* Copy first character of variable name. */
1074 if (sv.name[0] == '@' || sv.name[0] == '#')
1075 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1077 fh_get_file_name (r->fh), i));
1079 name[0] = sv.name[0];
1081 /* Copy remaining characters of variable name. */
1082 for (j = 1; j < SHORT_NAME_LEN; j++)
1084 int c = (unsigned char) sv.name[j];
1093 if ( ! var_is_plausible_name(name, false) )
1094 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1095 fh_get_file_name (r->fh), name));
1097 /* Create variable. */
1098 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1100 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1101 fh_get_file_name (r->fh), name));
1103 /* Set the short name the same as the long name */
1104 var_set_short_name (vv, vv->name);
1106 /* Case reading data. */
1107 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1108 long_string_count = nv - 1;
1111 /* Get variable label, if any. */
1112 if (sv.has_var_label == 1)
1117 /* Read length of label. */
1118 assertive_buf_read (r, &len, sizeof len, 0);
1119 if (r->reverse_endian)
1123 if (len < 0 || len > 255)
1124 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1126 fh_get_file_name (r->fh), vv->name, len));
1130 /* Read label into variable structure. */
1131 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1132 if (vv->label == NULL)
1134 vv->label[len] = '\0';
1138 /* Set missing values. */
1139 if (sv.n_missing_values != 0)
1142 int mv_cnt = abs (sv.n_missing_values);
1144 if (vv->width > MAX_SHORT_STRING)
1145 lose ((ME, _("%s: Long string variable %s may not have missing "
1147 fh_get_file_name (r->fh), vv->name));
1149 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1151 if (r->reverse_endian && vv->type == NUMERIC)
1152 for (j = 0; j < mv_cnt; j++)
1153 bswap_flt64 (&mv[j]);
1155 if (sv.n_missing_values > 0)
1157 for (j = 0; j < sv.n_missing_values; j++)
1158 if (vv->type == NUMERIC)
1159 mv_add_num (&vv->miss, mv[j]);
1161 mv_add_str (&vv->miss, (char *) &mv[j]);
1165 if (vv->type == ALPHA)
1166 lose ((ME, _("%s: String variable %s may not have missing "
1167 "values specified as a range."),
1168 fh_get_file_name (r->fh), vv->name));
1170 if (mv[0] == r->lowest)
1171 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1172 else if (mv[1] == r->highest)
1173 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1175 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1177 if (sv.n_missing_values == -3)
1178 mv_add_num (&vv->miss, mv[2]);
1182 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1183 || !parse_format_spec (r, sv.write, &vv->write, vv))
1187 /* Some consistency checks. */
1188 if (long_string_count != 0)
1189 lose ((ME, _("%s: Long string continuation records omitted at end of "
1191 fh_get_file_name (r->fh)));
1193 if (next_value != r->value_cnt)
1194 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1195 "%d were read from file."),
1196 fh_get_file_name (r->fh), r->value_cnt, next_value);
1205 /* Translates the format spec from sysfile format to internal
1208 parse_format_spec (struct sfm_reader *r, int32_t s,
1209 struct fmt_spec *f, const struct variable *v)
1213 if (!fmt_from_io ((s >> 16) & 0xff, &f->type))
1214 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1215 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1216 f->w = (s >> 8) & 0xff;
1219 if ((v->type == ALPHA) ^ (fmt_is_string (f->type) != 0))
1220 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1221 fh_get_file_name (r->fh),
1222 v->type == ALPHA ? _("String") : _("Numeric"),
1224 fmt_is_string (f->type) ? _("string") : _("numeric"),
1225 fmt_name (f->type)));
1228 ok = fmt_check_output (f) && fmt_check_width_compat (f, v->width);
1233 char fmt_string[FMT_STRING_LEN_MAX + 1];
1234 msg (ME, _("%s variable %s has invalid format specifier %s."),
1235 v->type == NUMERIC ? _("Numeric") : _("String"),
1236 v->name, fmt_to_string (f, fmt_string));
1237 *f = (v->type == NUMERIC
1238 ? fmt_for_output (FMT_F, 8, 2)
1239 : fmt_for_output (FMT_A, v->width, 0));
1247 /* Reads value labels from sysfile H and inserts them into the
1248 associated dictionary. */
1250 read_value_labels (struct sfm_reader *r,
1251 struct dictionary *dict, struct variable **var_by_idx)
1255 char raw_value[8]; /* Value as uninterpreted bytes. */
1256 union value value; /* Value. */
1257 char *label; /* Null-terminated label string. */
1260 struct label *labels = NULL;
1261 int32_t n_labels; /* Number of labels. */
1263 struct variable **var = NULL; /* Associated variables. */
1264 int32_t n_vars; /* Number of associated variables. */
1268 /* First step: read the contents of the type 3 record and record its
1269 contents. Note that we can't do much with the data since we
1270 don't know yet whether it is of numeric or string type. */
1272 /* Read number of labels. */
1273 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1274 if (r->reverse_endian)
1275 bswap_int32 (&n_labels);
1277 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1279 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1280 fh_get_file_name (r->fh), n_labels);
1284 /* Allocate memory. */
1285 labels = xcalloc (n_labels, sizeof *labels);
1286 for (i = 0; i < n_labels; i++)
1287 labels[i].label = NULL;
1289 /* Read each value/label tuple into labels[]. */
1290 for (i = 0; i < n_labels; i++)
1292 struct label *label = labels + i;
1293 unsigned char label_len;
1297 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1299 /* Read label length. */
1300 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1301 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1303 /* Read label, padding. */
1304 label->label = xmalloc (padded_len + 1);
1305 assertive_buf_read (r, label->label, padded_len - 1, 0);
1306 label->label[label_len] = 0;
1309 /* Second step: Read the type 4 record that has the list of
1310 variables to which the value labels are to be applied. */
1312 /* Read record type of type 4 record. */
1316 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1317 if (r->reverse_endian)
1318 bswap_int32 (&rec_type);
1321 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1322 "follow value label record (type 3) as it should."),
1323 fh_get_file_name (r->fh)));
1326 /* Read number of variables associated with value label from type 4
1328 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1329 if (r->reverse_endian)
1330 bswap_int32 (&n_vars);
1331 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1332 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1333 "is not between 1 and the number of variables (%d)."),
1334 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1336 /* Read the list of variables. */
1337 var = xnmalloc (n_vars, sizeof *var);
1338 for (i = 0; i < n_vars; i++)
1343 /* Read variable index, check range. */
1344 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1345 if (r->reverse_endian)
1346 bswap_int32 (&var_idx);
1347 if (var_idx < 1 || var_idx > r->value_cnt)
1348 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1349 "not between 1 and the number of values (%d)."),
1350 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1352 /* Make sure it's a real variable. */
1353 v = var_by_idx[var_idx - 1];
1355 lose ((ME, _("%s: Variable index associated with value label (%d) "
1356 "refers to a continuation of a string variable, not to "
1357 "an actual variable."),
1358 fh_get_file_name (r->fh), var_idx));
1359 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1360 lose ((ME, _("%s: Value labels are not allowed on long string "
1362 fh_get_file_name (r->fh), v->name));
1364 /* Add it to the list of variables. */
1368 /* Type check the variables. */
1369 for (i = 1; i < n_vars; i++)
1370 if (var[i]->type != var[0]->type)
1371 lose ((ME, _("%s: Variables associated with value label are not all of "
1372 "identical type. Variable %s has %s type, but variable "
1374 fh_get_file_name (r->fh),
1375 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1376 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1378 /* Fill in labels[].value, now that we know the desired type. */
1379 for (i = 0; i < n_labels; i++)
1381 struct label *label = labels + i;
1383 if (var[0]->type == ALPHA)
1385 const int copy_len = min (sizeof label->raw_value,
1386 sizeof label->label);
1387 memcpy (label->value.s, label->raw_value, copy_len);
1390 assert (sizeof f == sizeof label->raw_value);
1391 memcpy (&f, label->raw_value, sizeof f);
1392 if (r->reverse_endian)
1398 /* Assign the value_label's to each variable. */
1399 for (i = 0; i < n_vars; i++)
1401 struct variable *v = var[i];
1404 /* Add each label to the variable. */
1405 for (j = 0; j < n_labels; j++)
1407 struct label *label = labels + j;
1408 if (!val_labs_replace (v->val_labs, label->value, label->label))
1411 if (var[0]->type == NUMERIC)
1412 msg (MW, _("%s: File contains duplicate label for value %g for "
1414 fh_get_file_name (r->fh), label->value.f, v->name);
1416 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1417 "for variable %s."),
1418 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1422 for (i = 0; i < n_labels; i++)
1423 free (labels[i].label);
1431 for (i = 0; i < n_labels; i++)
1432 free (labels[i].label);
1439 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1440 non-NULL, uses that as the buffer; otherwise allocates at least
1441 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1444 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1448 if (buf == NULL && byte_cnt > 0 )
1449 buf = xmalloc (max (byte_cnt, min_alloc));
1451 if ( byte_cnt == 0 )
1455 if (1 != fread (buf, byte_cnt, 1, r->file))
1457 if (ferror (r->file))
1458 msg (ME, _("%s: Reading system file: %s."),
1459 fh_get_file_name (r->fh), strerror (errno));
1461 corrupt_msg (ME, _("%s: Unexpected end of file."),
1462 fh_get_file_name (r->fh));
1470 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1472 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1474 assert(byte_cnt > 0);
1476 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1478 msg (ME, _("%s: Seeking system file: %s."),
1479 fh_get_file_name (r->fh), strerror (errno));
1483 /* Reads a document record, type 6, from system file R, and sets up
1484 the documents and n_documents fields in the associated
1487 read_documents (struct sfm_reader *r, struct dictionary *dict)
1492 if (dict_get_documents (dict) != NULL)
1493 lose ((ME, _("%s: System file contains multiple "
1494 "type 6 (document) records."),
1495 fh_get_file_name (r->fh)));
1497 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1499 lose ((ME, _("%s: Number of document lines (%ld) "
1500 "must be greater than 0."),
1501 fh_get_file_name (r->fh), (long) line_cnt));
1503 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1504 /* FIXME? Run through asciify. */
1505 if (documents == NULL)
1507 documents[80 * line_cnt] = '\0';
1508 dict_set_documents (dict, documents);
1518 /* Reads compressed data into H->BUF and sets other pointers
1519 appropriately. Returns nonzero only if both no errors occur and
1522 buffer_input (struct sfm_reader *r)
1529 r->buf = xnmalloc (128, sizeof *r->buf);
1530 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1531 if (ferror (r->file))
1533 msg (ME, _("%s: Error reading file: %s."),
1534 fh_get_file_name (r->fh), strerror (errno));
1539 r->end = &r->buf[amt];
1543 /* Reads a single case consisting of compressed data from system
1544 file H into the array BUF[] according to reader R, and
1545 returns nonzero only if successful. */
1546 /* Data in system files is compressed in this manner. Data
1547 values are grouped into sets of eight ("octets"). Each value
1548 in an octet has one instruction byte that are output together.
1549 Each instruction byte gives a value for that byte or indicates
1550 that the value can be found following the instructions. */
1552 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1554 const unsigned char *p_end = r->x + sizeof (flt64);
1555 unsigned char *p = r->y;
1557 const flt64 *buf_beg = buf;
1558 const flt64 *buf_end = &buf[r->value_cnt];
1562 for (; p < p_end; p++){
1566 /* Code 0 is ignored. */
1569 /* Code 252 is end of file. */
1572 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1573 "in partial case."),
1574 fh_get_file_name (r->fh)));
1576 /* Code 253 indicates that the value is stored explicitly
1577 following the instruction bytes. */
1578 if (r->ptr == NULL || r->ptr >= r->end)
1579 if (!buffer_input (r))
1580 lose ((ME, _("%s: Unexpected end of file."),
1581 fh_get_file_name (r->fh)));
1582 memcpy (buf++, r->ptr++, sizeof *buf);
1587 /* Code 254 indicates a string that is all blanks. */
1588 memset (buf++, ' ', sizeof *buf);
1593 /* Code 255 indicates the system-missing value. */
1595 if (r->reverse_endian)
1602 /* Codes 1 through 251 inclusive are taken to indicate a
1603 value of (BYTE - BIAS), where BYTE is the byte's value
1604 and BIAS is the compression bias (generally 100.0). */
1605 *buf = *p - r->bias;
1606 if (r->reverse_endian)
1614 /* We have reached the end of this instruction octet. Read
1616 if (r->ptr == NULL || r->ptr >= r->end)
1618 if (!buffer_input (r))
1621 lose ((ME, _("%s: Unexpected end of file."),
1622 fh_get_file_name (r->fh)));
1627 memcpy (r->x, r->ptr++, sizeof *buf);
1634 /* We have filled up an entire record. Update state and return
1645 /* Reads one case from READER's file into C. Returns nonzero
1646 only if successful. */
1648 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1653 if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
1655 /* Fast path: external and internal representations are the
1656 same, except possibly for endianness or SYSMIS. Read
1657 directly into the case's buffer, then fix up any minor
1658 details as needed. */
1659 if (!fread_ok (r, case_data_all_rw (c),
1660 sizeof (union value) * r->value_cnt))
1663 /* Fix up endianness if needed. */
1664 if (r->reverse_endian)
1668 for (i = 0; i < r->var_cnt; i++)
1669 if (r->vars[i].width == 0)
1670 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1673 /* Fix up SYSMIS values if needed.
1674 I don't think this will ever actually kick in, but it
1676 if (r->sysmis != SYSMIS)
1680 for (i = 0; i < r->var_cnt; i++)
1681 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1682 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1687 /* Slow path: internal and external representations differ.
1688 Read into a bounce buffer, then copy to C. */
1695 bounce_size = sizeof *bounce * r->value_cnt;
1696 bounce = bounce_cur = local_alloc (bounce_size);
1698 memset(bounce, 0, bounce_size);
1701 read_ok = fread_ok (r, bounce, bounce_size);
1703 read_ok = read_compressed_data (r, bounce);
1706 local_free (bounce);
1710 for (i = 0; i < r->var_cnt; i++)
1712 struct sfm_var *sv = &r->vars[i];
1716 flt64 f = *bounce_cur++;
1717 if (r->reverse_endian)
1719 case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f;
1723 flt64 *bc_start = bounce_cur;
1725 while (ofs < sv->width )
1727 const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs);
1728 memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk);
1730 bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
1734 bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64);
1738 local_free (bounce);
1744 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1746 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1748 if (read_bytes == byte_cnt)
1752 if (ferror (r->file))
1754 msg (ME, _("%s: Reading system file: %s."),
1755 fh_get_file_name (r->fh), strerror (errno));
1758 else if (read_bytes != 0)
1760 msg (ME, _("%s: Partial record at end of system file."),
1761 fh_get_file_name (r->fh));
1768 /* Returns true if an I/O error has occurred on READER, false
1771 sfm_read_error (const struct sfm_reader *reader)
1776 /* Returns true if FILE is an SPSS system file,
1779 sfm_detect (FILE *file)
1781 struct sysfile_header hdr;
1783 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1785 if (strncmp ("$FL2", hdr.rec_type, 4))