1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include <libpspp/alloc.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/message.h>
31 #include <libpspp/compiler.h>
32 #include <libpspp/magic.h>
33 #include <libpspp/misc.h>
34 #include <libpspp/str.h>
35 #include <libpspp/hash.h>
36 #include <libpspp/array.h>
38 #include "sys-file-reader.h"
39 #include "sfm-private.h"
41 #include "dictionary.h"
42 #include "file-handle-def.h"
43 #include "file-name.h"
45 #include "value-labels.h"
50 #define _(msgid) gettext (msgid)
52 /* System file reader. */
55 struct file_handle *fh; /* File handle. */
56 FILE *file; /* File stream. */
58 int reverse_endian; /* 1=file has endianness opposite us. */
59 int value_cnt; /* Number of `union values's per case. */
60 long case_cnt; /* Number of cases, -1 if unknown. */
61 int compressed; /* 1=compressed, 0=not compressed. */
62 double bias; /* Compression bias, usually 100.0. */
63 int weight_idx; /* 0-based index of weighting variable, or -1. */
64 bool ok; /* False after an I/O error or corrupt data. */
65 bool has_vls; /* True if the file has one or more Very Long Strings*/
71 /* File's special constants. */
76 /* Decompression buffer. */
77 flt64 *buf; /* Buffer data. */
78 flt64 *ptr; /* Current location in buffer. */
79 flt64 *end; /* End of buffer data. */
81 /* Compression instruction octet. */
82 unsigned char x[8]; /* Current instruction octet. */
83 unsigned char *y; /* Location in current instruction octet. */
86 /* A variable in a system file. */
89 int width; /* 0=numeric, otherwise string width. */
90 int fv; /* Index into case. */
95 /* Swap bytes *A and *B. */
97 bswap (char *a, char *b)
104 /* Reverse the byte order of 32-bit integer *X. */
106 bswap_int32 (int32_t *x_)
108 char *x = (char *) x_;
109 bswap (x + 0, x + 3);
110 bswap (x + 1, x + 2);
113 /* Reverse the byte order of 64-bit floating point *X. */
115 bswap_flt64 (flt64 *x_)
117 char *x = (char *) x_;
118 bswap (x + 0, x + 7);
119 bswap (x + 1, x + 6);
120 bswap (x + 2, x + 5);
121 bswap (x + 3, x + 4);
125 corrupt_msg (int class, const char *format,...)
126 PRINTF_FORMAT (2, 3);
128 /* Displays a corrupt sysfile error. */
130 corrupt_msg (int class, const char *format,...)
136 ds_init_cstr (&text, _("corrupt system file: "));
137 va_start (args, format);
138 ds_put_vformat (&text, format, args);
141 m.category = msg_class_to_category (class);
142 m.severity = msg_class_to_severity (class);
143 m.where.file_name = NULL;
144 m.where.line_number = 0;
145 m.text = ds_cstr (&text);
150 /* Closes a system file after we're done with it. */
152 sfm_close_reader (struct sfm_reader *r)
159 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
160 msg (ME, _("%s: Closing system file: %s."),
161 fh_get_file_name (r->fh), strerror (errno));
166 fh_close (r->fh, "system file", "rs");
173 /* Dictionary reader. */
175 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
177 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
180 static int read_header (struct sfm_reader *,
181 struct dictionary *, struct sfm_read_info *);
182 static int parse_format_spec (struct sfm_reader *, int32_t,
183 struct fmt_spec *, const struct variable *);
184 static int read_value_labels (struct sfm_reader *, struct dictionary *,
185 struct variable **var_by_idx);
186 static int read_variables (struct sfm_reader *,
187 struct dictionary *, struct variable ***var_by_idx);
188 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
189 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
190 static int read_documents (struct sfm_reader *, struct dictionary *);
192 static int fread_ok (struct sfm_reader *, void *, size_t);
194 /* Displays the message X with corrupt_msg, then jumps to the error
202 /* Calls buf_read with the specified arguments, and jumps to
203 error if the read fails. */
204 #define assertive_buf_read(a,b,c,d) \
206 if (!buf_read (a,b,c,d)) \
218 pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
222 const struct name_pair *p1 = _p1;
223 const struct name_pair *p2 = _p2;
225 char buf1[SHORT_NAME_LEN + 1];
226 char buf2[SHORT_NAME_LEN + 1];
228 memset(buf1, 0, SHORT_NAME_LEN + 1);
229 memset(buf2, 0, SHORT_NAME_LEN + 1);
231 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
233 buf1[i] = p1->shortname[i];
234 if ( '\0' == buf1[i])
238 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
240 buf2[i] = p2->shortname[i];
241 if ( '\0' == buf2[i])
245 return strncmp(buf1, buf2, SHORT_NAME_LEN);
249 pair_sn_hash(const void *_p, void *aux UNUSED)
252 const struct name_pair *p = _p;
253 char buf[SHORT_NAME_LEN + 1];
255 memset(buf, 0, SHORT_NAME_LEN + 1);
256 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
258 buf[i] = p->shortname[i];
263 return hsh_hash_bytes(buf, strlen(buf));
267 pair_sn_free(void *p, void *aux UNUSED)
274 /* Opens the system file designated by file handle FH for
275 reading. Reads the system file's dictionary into *DICT.
276 If INFO is non-null, then it receives additional info about the
279 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
280 struct sfm_read_info *info)
282 struct sfm_reader *r = NULL;
283 struct variable **var_by_idx = NULL;
285 /* The data in record 7(14) */
286 char *subrec14data = 0;
288 /* A hash table of long variable names indexed by short name */
289 struct hsh_table *short_to_long = NULL;
291 *dict = dict_create ();
292 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
295 /* Create and initialize reader. */
296 r = xmalloc (sizeof *r);
298 r->file = fn_open (fh_get_file_name (fh), "rb");
300 r->reverse_endian = 0;
311 r->sysmis = -FLT64_MAX;
312 r->highest = FLT64_MAX;
313 r->lowest = second_lowest_flt64;
315 r->buf = r->ptr = r->end = NULL;
316 r->y = r->x + sizeof r->x;
318 /* Check that file open succeeded. */
321 msg (ME, _("An error occurred while opening \"%s\" for reading "
322 "as a system file: %s."),
323 fh_get_file_name (r->fh), strerror (errno));
327 /* Read header and variables. */
328 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
332 /* Handle weighting. */
333 if (r->weight_idx != -1)
335 struct variable *weight_var;
337 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
338 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
339 "and number of elements per case (%d)."),
340 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
343 weight_var = var_by_idx[r->weight_idx];
345 if (weight_var == NULL)
347 _("%s: Weighting variable may not be a continuation of "
348 "a long string variable."), fh_get_file_name (fh)));
349 else if (weight_var->type == ALPHA)
350 lose ((ME, _("%s: Weighting variable may not be a string variable."),
351 fh_get_file_name (fh)));
353 dict_set_weight (*dict, weight_var);
356 dict_set_weight (*dict, NULL);
358 /* Read records of types 3, 4, 6, and 7. */
363 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
364 if (r->reverse_endian)
365 bswap_int32 (&rec_type);
371 if (!read_value_labels (r, *dict, var_by_idx))
376 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
377 "records must always immediately follow type 3 "
379 fh_get_file_name (r->fh)));
382 if (!read_documents (r, *dict))
399 assertive_buf_read (r, &data, sizeof data, 0);
400 if (r->reverse_endian)
402 bswap_int32 (&data.subtype);
403 bswap_int32 (&data.size);
404 bswap_int32 (&data.count);
406 bytes = data.size * data.count;
408 if (bytes < data.size || bytes < data.count)
409 lose ((ME, "%s: Record type %d subtype %d too large.",
410 fh_get_file_name (r->fh), rec_type, data.subtype));
412 switch (data.subtype)
415 if (!read_machine_int32_info (r, data.size, data.count))
420 if (!read_machine_flt64_info (r, data.size, data.count))
425 case 6: /* ?? Used by SPSS 8.0. */
429 case 11: /* Variable display parameters */
431 const int n_vars = data.count / 3 ;
433 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
435 msg (MW, _("%s: Invalid subrecord length. "
436 "Record: 7; Subrecord: 11"),
437 fh_get_file_name (r->fh));
442 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
454 assertive_buf_read (r, ¶ms, sizeof(params), 0);
456 if ( ! measure_is_valid(params.measure)
458 ! alignment_is_valid(params.align))
461 _("%s: Invalid variable display parameters. Default parameters substituted."),
462 fh_get_file_name(r->fh));
466 v = dict_get_var(*dict, i);
468 v->measure = params.measure;
469 v->display_width = params.width;
470 v->alignment = params.align;
475 case 13: /* SPSS 12.0 Long variable name map */
477 char *short_name, *save_ptr;
481 subrec14data = xmalloc (bytes + 1);
482 if (!buf_read (r, subrec14data, bytes, 0))
486 subrec14data[bytes] = '\0';
488 short_to_long = hsh_create(4,
495 for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
497 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
499 struct name_pair *pair ;
500 char *long_name = strtok_r (NULL, "\t", &save_ptr);
503 /* Validate long name. */
504 if (long_name == NULL)
506 msg (MW, _("%s: Trailing garbage in long variable "
508 fh_get_file_name (r->fh));
511 if (!var_is_valid_name (long_name, false))
513 msg (MW, _("%s: Long variable mapping to invalid "
514 "variable name `%s'."),
515 fh_get_file_name (r->fh), long_name);
519 /* Find variable using short name. */
520 v = dict_lookup_var (*dict, short_name);
523 msg (MW, _("%s: Long variable mapping for "
524 "nonexistent variable %s."),
525 fh_get_file_name (r->fh), short_name);
529 /* Identify any duplicates. */
530 if ( compare_var_names(short_name, long_name, 0) &&
531 NULL != dict_lookup_var (*dict, long_name))
532 lose ((ME, _("%s: Duplicate long variable name `%s' "
533 "within system file."),
534 fh_get_file_name (r->fh), long_name));
538 Renaming a variable may clear the short
539 name, but we want to retain it, so
540 re-set it explicitly. */
541 dict_rename_var (*dict, v, long_name);
542 var_set_short_name (v, short_name);
544 pair = xmalloc(sizeof *pair);
545 pair->shortname = short_name;
546 pair->longname = long_name;
547 hsh_insert(short_to_long, pair);
549 /* This messes up the processing of subtype 14 (below).
550 I'm not sure if it is needed anyway, so I'm removing it for
551 now. If it's needed, then it will need to be done after all the
552 records have been processed. --- JMD 27 April 2006
555 /* For compatibility, make sure dictionary
556 is in long variable name map order. In
557 the common case, this has no effect,
558 because the dictionary and the long
559 variable name map are already in the
561 dict_reorder_var (*dict, v, idx);
571 bool eq_seen = false;
575 char *buffer = xmalloc (bytes + 1);
576 if (!buf_read (r, buffer, bytes, 0))
581 buffer[bytes] = '\0';
585 /* Note: SPSS v13 terminates this record with 00,
586 whereas SPSS v14 terminates it with 00 09. We must
588 for(i = 0; i < bytes ; ++i)
591 static char name[SHORT_NAME_LEN + 1] = {0};
592 static char len_str[6] ={0};
601 length = strtol(len_str, 0, 10);
602 if ( length != LONG_MAX && length != LONG_MIN)
604 char *lookup_name = name;
611 struct name_pair pair;
614 pair.shortname = name;
615 p = hsh_find(short_to_long, &pair);
617 lookup_name = p->longname;
620 v = dict_lookup_var(*dict, lookup_name);
624 _("%s: No variable called %s but it is listed in length table."),
625 fh_get_file_name (r->fh), lookup_name);
632 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
633 l -= EFFECTIVE_LONG_STRING_LENGTH;
640 struct variable *v_next;
641 v_next = dict_get_var(*dict, idx + 1);
643 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
644 l -= EFFECTIVE_LONG_STRING_LENGTH;
648 dict_delete_var(*dict, v_next);
651 assert ( length > MAX_LONG_STRING );
654 v->print.w = v->width;
655 v->write.w = v->width;
656 v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
659 memset(name, 0, SHORT_NAME_LEN+1);
660 memset(len_str, 0, 6);
667 len_str[j] = buffer[i];
675 dict_compact_values(*dict);
680 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
681 "encountered in system file."),
682 fh_get_file_name (r->fh), data.subtype);
688 void *x = buf_read (r, NULL, data.size * data.count, 0);
700 assertive_buf_read (r, &filler, sizeof filler, 0);
706 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
707 fh_get_file_name (r->fh), rec_type);
712 /* Come here on successful completion. */
714 /* Create an index of dictionary variable widths for
715 sfm_read_case to use. We cannot use the `struct variables'
716 from the dictionary we created, because the caller owns the
717 dictionary and may destroy or modify its variables. */
721 r->var_cnt = dict_get_var_cnt (*dict);
722 r->vars = xnmalloc (r->var_cnt, sizeof *r->vars);
723 for (i = 0; i < r->var_cnt; i++)
725 struct variable *v = dict_get_var (*dict, i);
726 struct sfm_var *sv = &r->vars[i];
727 sv->width = v->width;
733 hsh_destroy(short_to_long);
738 /* Come here on unsuccessful completion. */
739 sfm_close_reader (r);
741 hsh_destroy(short_to_long);
745 dict_destroy (*dict);
751 /* Read record type 7, subtype 3. */
753 read_machine_int32_info (struct sfm_reader *r, int size, int count)
760 if (size != sizeof (int32_t) || count != 8)
761 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
762 "subtype 3. Expected size %d, count 8."),
763 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
765 assertive_buf_read (r, data, sizeof data, 0);
766 if (r->reverse_endian)
767 for (i = 0; i < 8; i++)
768 bswap_int32 (&data[i]);
772 lose ((ME, _("%s: Floating-point representation in system file is not "
773 "IEEE-754. PSPP cannot convert between floating-point "
775 fh_get_file_name (r->fh)));
777 #error Add support for your floating-point format.
780 #ifdef WORDS_BIGENDIAN
785 if (r->reverse_endian)
787 if (file_bigendian ^ (data[6] == 1))
788 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
789 "endianness intuited from file header (%s)."),
790 fh_get_file_name (r->fh),
791 file_bigendian ? _("big-endian") : _("little-endian"),
792 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
795 /* PORTME: Character representation code. */
796 if (data[7] != 2 && data[7] != 3)
797 lose ((ME, _("%s: File-indicated character representation code (%s) is "
799 fh_get_file_name (r->fh),
800 (data[7] == 1 ? "EBCDIC"
801 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
809 /* Read record type 7, subtype 4. */
811 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
816 if (size != sizeof (flt64) || count != 3)
817 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
818 "subtype 4. Expected size %d, count 8."),
819 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
821 assertive_buf_read (r, data, sizeof data, 0);
822 if (r->reverse_endian)
823 for (i = 0; i < 3; i++)
824 bswap_flt64 (&data[i]);
826 if (data[0] != SYSMIS || data[1] != FLT64_MAX
827 || data[2] != second_lowest_flt64)
830 r->highest = data[1];
832 msg (MW, _("%s: File-indicated value is different from internal value "
833 "for at least one of the three system values. SYSMIS: "
834 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
836 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
837 (double) data[1], (double) FLT64_MAX,
838 (double) data[2], (double) second_lowest_flt64);
848 read_header (struct sfm_reader *r,
849 struct dictionary *dict, struct sfm_read_info *info)
851 struct sysfile_header hdr; /* Disk buffer. */
852 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
853 int skip_amt = 0; /* Amount of product name to omit. */
856 /* Read header, check magic. */
857 assertive_buf_read (r, &hdr, sizeof hdr, 0);
858 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
859 lose ((ME, _("%s: Bad magic. Proper system files begin with "
860 "the four characters `$FL2'. This file will not be read."),
861 fh_get_file_name (r->fh)));
863 /* Check eye-category.her string. */
864 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
865 for (i = 0; i < 60; i++)
866 if (!c_isprint ((unsigned char) prod_name[i]))
868 for (i = 59; i >= 0; i--)
869 if (!c_isgraph ((unsigned char) prod_name[i]))
874 prod_name[60] = '\0';
878 static const char *prefix[N_PREFIXES] =
880 "@(#) SPSS DATA FILE",
886 for (i = 0; i < N_PREFIXES; i++)
887 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
889 skip_amt = strlen (prefix[i]);
894 /* Check endianness. */
895 if (hdr.layout_code == 2)
896 r->reverse_endian = 0;
899 bswap_int32 (&hdr.layout_code);
900 if (hdr.layout_code != 2)
901 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
902 "should be 2, in big-endian or little-endian format."),
903 fh_get_file_name (r->fh), hdr.layout_code));
905 r->reverse_endian = 1;
906 bswap_int32 (&hdr.nominal_case_size);
907 bswap_int32 (&hdr.compress);
908 bswap_int32 (&hdr.weight_idx);
909 bswap_int32 (&hdr.case_cnt);
910 bswap_flt64 (&hdr.bias);
914 /* Copy basic info and verify correctness. */
915 r->value_cnt = hdr.nominal_case_size;
917 /* If value count is ridiculous, then force it to -1 (a
919 if ( r->value_cnt < 0 ||
920 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
923 r->compressed = hdr.compress;
925 r->weight_idx = hdr.weight_idx - 1;
927 r->case_cnt = hdr.case_cnt;
928 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
930 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
931 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
934 if (r->bias != 100.0)
935 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
937 fh_get_file_name (r->fh), r->bias);
939 /* Make a file label only on the condition that the given label is
940 not all spaces or nulls. */
944 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
946 if (!c_isspace ((unsigned char) hdr.file_label[i])
947 && hdr.file_label[i] != 0)
949 char *label = xmalloc (i + 2);
950 memcpy (label, hdr.file_label, i + 1);
952 dict_set_label (dict, label);
963 memcpy (info->creation_date, hdr.creation_date, 9);
964 info->creation_date[9] = 0;
966 memcpy (info->creation_time, hdr.creation_time, 8);
967 info->creation_time[8] = 0;
969 #ifdef WORDS_BIGENDIAN
970 info->big_endian = !r->reverse_endian;
972 info->big_endian = r->reverse_endian;
975 info->compressed = hdr.compress;
977 info->case_cnt = hdr.case_cnt;
979 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
980 if (c_isgraph ((unsigned char) *cp))
982 strcpy (info->product, cp);
991 /* Reads most of the dictionary from file H; also fills in the
992 associated VAR_BY_IDX array. */
994 read_variables (struct sfm_reader *r,
995 struct dictionary *dict, struct variable ***var_by_idx)
999 struct sysfile_variable sv; /* Disk buffer. */
1000 int long_string_count = 0; /* # of long string continuation
1001 records still expected. */
1002 int next_value = 0; /* Index to next `value' structure. */
1009 /* Read in the entry for each variable and use the info to
1010 initialize the dictionary. */
1013 struct variable *vv;
1014 char name[SHORT_NAME_LEN + 1];
1018 assertive_buf_read (r, &sv, sizeof sv, 0);
1020 if (r->reverse_endian)
1022 bswap_int32 (&sv.rec_type);
1023 bswap_int32 (&sv.type);
1024 bswap_int32 (&sv.has_var_label);
1025 bswap_int32 (&sv.n_missing_values);
1026 bswap_int32 (&sv.print);
1027 bswap_int32 (&sv.write);
1030 /* We've come to the end of the variable entries */
1031 if (sv.rec_type != 2)
1033 buf_unread(r, sizeof sv);
1038 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
1040 /* If there was a long string previously, make sure that the
1041 continuations are present; otherwise make sure there aren't
1043 if (long_string_count)
1046 lose ((ME, _("%s: position %d: String variable does not have "
1047 "proper number of continuation records."),
1048 fh_get_file_name (r->fh), i));
1051 (*var_by_idx)[i] = NULL;
1052 long_string_count--;
1055 else if (sv.type == -1)
1056 lose ((ME, _("%s: position %d: Superfluous long string continuation "
1058 fh_get_file_name (r->fh), i));
1060 /* Check fields for validity. */
1061 if (sv.type < 0 || sv.type > 255)
1062 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1063 fh_get_file_name (r->fh), i, sv.type));
1064 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1065 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1066 "0 or 1."), fh_get_file_name (r->fh), i));
1067 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1068 || sv.n_missing_values == -1)
1069 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1070 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1072 /* Copy first character of variable name. */
1073 if (sv.name[0] == '@' || sv.name[0] == '#')
1074 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1076 fh_get_file_name (r->fh), i));
1078 name[0] = sv.name[0];
1080 /* Copy remaining characters of variable name. */
1081 for (j = 1; j < SHORT_NAME_LEN; j++)
1083 int c = (unsigned char) sv.name[j];
1092 if ( ! var_is_plausible_name(name, false) )
1093 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1094 fh_get_file_name (r->fh), name));
1096 /* Create variable. */
1097 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1099 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1100 fh_get_file_name (r->fh), name));
1102 /* Set the short name the same as the long name */
1103 var_set_short_name (vv, vv->name);
1105 /* Case reading data. */
1106 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1107 long_string_count = nv - 1;
1110 /* Get variable label, if any. */
1111 if (sv.has_var_label == 1)
1116 /* Read length of label. */
1117 assertive_buf_read (r, &len, sizeof len, 0);
1118 if (r->reverse_endian)
1122 if (len < 0 || len > 255)
1123 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1125 fh_get_file_name (r->fh), vv->name, len));
1129 /* Read label into variable structure. */
1130 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1131 if (vv->label == NULL)
1133 vv->label[len] = '\0';
1137 /* Set missing values. */
1138 if (sv.n_missing_values != 0)
1141 int mv_cnt = abs (sv.n_missing_values);
1143 if (vv->width > MAX_SHORT_STRING)
1144 lose ((ME, _("%s: Long string variable %s may not have missing "
1146 fh_get_file_name (r->fh), vv->name));
1148 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1150 if (r->reverse_endian && vv->type == NUMERIC)
1151 for (j = 0; j < mv_cnt; j++)
1152 bswap_flt64 (&mv[j]);
1154 if (sv.n_missing_values > 0)
1156 for (j = 0; j < sv.n_missing_values; j++)
1157 if (vv->type == NUMERIC)
1158 mv_add_num (&vv->miss, mv[j]);
1160 mv_add_str (&vv->miss, (char *) &mv[j]);
1164 if (vv->type == ALPHA)
1165 lose ((ME, _("%s: String variable %s may not have missing "
1166 "values specified as a range."),
1167 fh_get_file_name (r->fh), vv->name));
1169 if (mv[0] == r->lowest)
1170 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1171 else if (mv[1] == r->highest)
1172 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1174 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1176 if (sv.n_missing_values == -3)
1177 mv_add_num (&vv->miss, mv[2]);
1181 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1182 || !parse_format_spec (r, sv.write, &vv->write, vv))
1186 /* Some consistency checks. */
1187 if (long_string_count != 0)
1188 lose ((ME, _("%s: Long string continuation records omitted at end of "
1190 fh_get_file_name (r->fh)));
1192 if (next_value != r->value_cnt)
1193 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1194 "%d were read from file."),
1195 fh_get_file_name (r->fh), r->value_cnt, next_value);
1204 /* Translates the format spec from sysfile format to internal
1207 parse_format_spec (struct sfm_reader *r, int32_t s,
1208 struct fmt_spec *f, const struct variable *v)
1210 f->type = translate_fmt ((s >> 16) & 0xff);
1212 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1213 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1214 f->w = (s >> 8) & 0xff;
1217 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1218 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1219 fh_get_file_name (r->fh),
1220 v->type == ALPHA ? _("String") : _("Numeric"),
1222 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1223 formats[f->type].name));
1225 if (!check_output_specifier (f, false)
1226 || !check_specifier_width (f, v->width, false))
1228 msg (ME, _("%s variable %s has invalid format specifier %s."),
1229 v->type == NUMERIC ? _("Numeric") : _("String"),
1230 v->name, fmt_to_string (f));
1231 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1239 /* Reads value labels from sysfile H and inserts them into the
1240 associated dictionary. */
1242 read_value_labels (struct sfm_reader *r,
1243 struct dictionary *dict, struct variable **var_by_idx)
1247 char raw_value[8]; /* Value as uninterpreted bytes. */
1248 union value value; /* Value. */
1249 char *label; /* Null-terminated label string. */
1252 struct label *labels = NULL;
1253 int32_t n_labels; /* Number of labels. */
1255 struct variable **var = NULL; /* Associated variables. */
1256 int32_t n_vars; /* Number of associated variables. */
1260 /* First step: read the contents of the type 3 record and record its
1261 contents. Note that we can't do much with the data since we
1262 don't know yet whether it is of numeric or string type. */
1264 /* Read number of labels. */
1265 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1266 if (r->reverse_endian)
1267 bswap_int32 (&n_labels);
1269 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1271 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1272 fh_get_file_name (r->fh), n_labels);
1276 /* Allocate memory. */
1277 labels = xcalloc (n_labels, sizeof *labels);
1278 for (i = 0; i < n_labels; i++)
1279 labels[i].label = NULL;
1281 /* Read each value/label tuple into labels[]. */
1282 for (i = 0; i < n_labels; i++)
1284 struct label *label = labels + i;
1285 unsigned char label_len;
1289 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1291 /* Read label length. */
1292 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1293 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1295 /* Read label, padding. */
1296 label->label = xmalloc (padded_len + 1);
1297 assertive_buf_read (r, label->label, padded_len - 1, 0);
1298 label->label[label_len] = 0;
1301 /* Second step: Read the type 4 record that has the list of
1302 variables to which the value labels are to be applied. */
1304 /* Read record type of type 4 record. */
1308 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1309 if (r->reverse_endian)
1310 bswap_int32 (&rec_type);
1313 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1314 "follow value label record (type 3) as it should."),
1315 fh_get_file_name (r->fh)));
1318 /* Read number of variables associated with value label from type 4
1320 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1321 if (r->reverse_endian)
1322 bswap_int32 (&n_vars);
1323 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1324 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1325 "is not between 1 and the number of variables (%d)."),
1326 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1328 /* Read the list of variables. */
1329 var = xnmalloc (n_vars, sizeof *var);
1330 for (i = 0; i < n_vars; i++)
1335 /* Read variable index, check range. */
1336 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1337 if (r->reverse_endian)
1338 bswap_int32 (&var_idx);
1339 if (var_idx < 1 || var_idx > r->value_cnt)
1340 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1341 "not between 1 and the number of values (%d)."),
1342 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1344 /* Make sure it's a real variable. */
1345 v = var_by_idx[var_idx - 1];
1347 lose ((ME, _("%s: Variable index associated with value label (%d) "
1348 "refers to a continuation of a string variable, not to "
1349 "an actual variable."),
1350 fh_get_file_name (r->fh), var_idx));
1351 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1352 lose ((ME, _("%s: Value labels are not allowed on long string "
1354 fh_get_file_name (r->fh), v->name));
1356 /* Add it to the list of variables. */
1360 /* Type check the variables. */
1361 for (i = 1; i < n_vars; i++)
1362 if (var[i]->type != var[0]->type)
1363 lose ((ME, _("%s: Variables associated with value label are not all of "
1364 "identical type. Variable %s has %s type, but variable "
1366 fh_get_file_name (r->fh),
1367 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1368 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1370 /* Fill in labels[].value, now that we know the desired type. */
1371 for (i = 0; i < n_labels; i++)
1373 struct label *label = labels + i;
1375 if (var[0]->type == ALPHA)
1377 const int copy_len = min (sizeof label->raw_value,
1378 sizeof label->label);
1379 memcpy (label->value.s, label->raw_value, copy_len);
1382 assert (sizeof f == sizeof label->raw_value);
1383 memcpy (&f, label->raw_value, sizeof f);
1384 if (r->reverse_endian)
1390 /* Assign the value_label's to each variable. */
1391 for (i = 0; i < n_vars; i++)
1393 struct variable *v = var[i];
1396 /* Add each label to the variable. */
1397 for (j = 0; j < n_labels; j++)
1399 struct label *label = labels + j;
1400 if (!val_labs_replace (v->val_labs, label->value, label->label))
1403 if (var[0]->type == NUMERIC)
1404 msg (MW, _("%s: File contains duplicate label for value %g for "
1406 fh_get_file_name (r->fh), label->value.f, v->name);
1408 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1409 "for variable %s."),
1410 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1414 for (i = 0; i < n_labels; i++)
1415 free (labels[i].label);
1423 for (i = 0; i < n_labels; i++)
1424 free (labels[i].label);
1431 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1432 non-NULL, uses that as the buffer; otherwise allocates at least
1433 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1436 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1440 if (buf == NULL && byte_cnt > 0 )
1441 buf = xmalloc (max (byte_cnt, min_alloc));
1443 if ( byte_cnt == 0 )
1447 if (1 != fread (buf, byte_cnt, 1, r->file))
1449 if (ferror (r->file))
1450 msg (ME, _("%s: Reading system file: %s."),
1451 fh_get_file_name (r->fh), strerror (errno));
1453 corrupt_msg (ME, _("%s: Unexpected end of file."),
1454 fh_get_file_name (r->fh));
1462 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1464 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1466 assert(byte_cnt > 0);
1468 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1470 msg (ME, _("%s: Seeking system file: %s."),
1471 fh_get_file_name (r->fh), strerror (errno));
1475 /* Reads a document record, type 6, from system file R, and sets up
1476 the documents and n_documents fields in the associated
1479 read_documents (struct sfm_reader *r, struct dictionary *dict)
1484 if (dict_get_documents (dict) != NULL)
1485 lose ((ME, _("%s: System file contains multiple "
1486 "type 6 (document) records."),
1487 fh_get_file_name (r->fh)));
1489 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1491 lose ((ME, _("%s: Number of document lines (%ld) "
1492 "must be greater than 0."),
1493 fh_get_file_name (r->fh), (long) line_cnt));
1495 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1496 /* FIXME? Run through asciify. */
1497 if (documents == NULL)
1499 documents[80 * line_cnt] = '\0';
1500 dict_set_documents (dict, documents);
1510 /* Reads compressed data into H->BUF and sets other pointers
1511 appropriately. Returns nonzero only if both no errors occur and
1514 buffer_input (struct sfm_reader *r)
1521 r->buf = xnmalloc (128, sizeof *r->buf);
1522 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1523 if (ferror (r->file))
1525 msg (ME, _("%s: Error reading file: %s."),
1526 fh_get_file_name (r->fh), strerror (errno));
1531 r->end = &r->buf[amt];
1535 /* Reads a single case consisting of compressed data from system
1536 file H into the array BUF[] according to reader R, and
1537 returns nonzero only if successful. */
1538 /* Data in system files is compressed in this manner. Data
1539 values are grouped into sets of eight ("octets"). Each value
1540 in an octet has one instruction byte that are output together.
1541 Each instruction byte gives a value for that byte or indicates
1542 that the value can be found following the instructions. */
1544 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1546 const unsigned char *p_end = r->x + sizeof (flt64);
1547 unsigned char *p = r->y;
1549 const flt64 *buf_beg = buf;
1550 const flt64 *buf_end = &buf[r->value_cnt];
1554 for (; p < p_end; p++){
1558 /* Code 0 is ignored. */
1561 /* Code 252 is end of file. */
1564 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1565 "in partial case."),
1566 fh_get_file_name (r->fh)));
1568 /* Code 253 indicates that the value is stored explicitly
1569 following the instruction bytes. */
1570 if (r->ptr == NULL || r->ptr >= r->end)
1571 if (!buffer_input (r))
1572 lose ((ME, _("%s: Unexpected end of file."),
1573 fh_get_file_name (r->fh)));
1574 memcpy (buf++, r->ptr++, sizeof *buf);
1579 /* Code 254 indicates a string that is all blanks. */
1580 memset (buf++, ' ', sizeof *buf);
1585 /* Code 255 indicates the system-missing value. */
1587 if (r->reverse_endian)
1594 /* Codes 1 through 251 inclusive are taken to indicate a
1595 value of (BYTE - BIAS), where BYTE is the byte's value
1596 and BIAS is the compression bias (generally 100.0). */
1597 *buf = *p - r->bias;
1598 if (r->reverse_endian)
1606 /* We have reached the end of this instruction octet. Read
1608 if (r->ptr == NULL || r->ptr >= r->end)
1610 if (!buffer_input (r))
1613 lose ((ME, _("%s: Unexpected end of file."),
1614 fh_get_file_name (r->fh)));
1619 memcpy (r->x, r->ptr++, sizeof *buf);
1626 /* We have filled up an entire record. Update state and return
1637 /* Reads one case from READER's file into C. Returns nonzero
1638 only if successful. */
1640 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1645 if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
1647 /* Fast path: external and internal representations are the
1648 same, except possibly for endianness or SYSMIS. Read
1649 directly into the case's buffer, then fix up any minor
1650 details as needed. */
1651 if (!fread_ok (r, case_data_all_rw (c),
1652 sizeof (union value) * r->value_cnt))
1655 /* Fix up endianness if needed. */
1656 if (r->reverse_endian)
1660 for (i = 0; i < r->var_cnt; i++)
1661 if (r->vars[i].width == 0)
1662 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1665 /* Fix up SYSMIS values if needed.
1666 I don't think this will ever actually kick in, but it
1668 if (r->sysmis != SYSMIS)
1672 for (i = 0; i < r->var_cnt; i++)
1673 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1674 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1679 /* Slow path: internal and external representations differ.
1680 Read into a bounce buffer, then copy to C. */
1687 bounce_size = sizeof *bounce * r->value_cnt;
1688 bounce = bounce_cur = local_alloc (bounce_size);
1690 memset(bounce, 0, bounce_size);
1693 read_ok = fread_ok (r, bounce, bounce_size);
1695 read_ok = read_compressed_data (r, bounce);
1698 local_free (bounce);
1702 for (i = 0; i < r->var_cnt; i++)
1704 struct sfm_var *sv = &r->vars[i];
1708 flt64 f = *bounce_cur++;
1709 if (r->reverse_endian)
1711 case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f;
1715 flt64 *bc_start = bounce_cur;
1717 while (ofs < sv->width )
1719 const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs);
1720 memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk);
1722 bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
1726 bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64);
1730 local_free (bounce);
1736 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1738 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1740 if (read_bytes == byte_cnt)
1744 if (ferror (r->file))
1746 msg (ME, _("%s: Reading system file: %s."),
1747 fh_get_file_name (r->fh), strerror (errno));
1750 else if (read_bytes != 0)
1752 msg (ME, _("%s: Partial record at end of system file."),
1753 fh_get_file_name (r->fh));
1760 /* Returns true if an I/O error has occurred on READER, false
1763 sfm_read_error (const struct sfm_reader *reader)
1768 /* Returns true if FILE is an SPSS system file,
1771 sfm_detect (FILE *file)
1773 struct sysfile_header hdr;
1775 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1777 if (strncmp ("$FL2", hdr.rec_type, 4))