1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 #include <libpspp/alloc.h>
29 #include <libpspp/message.h>
30 #include <libpspp/compiler.h>
31 #include <libpspp/magic.h>
32 #include <libpspp/misc.h>
33 #include <libpspp/str.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/array.h>
37 #include "sys-file-reader.h"
38 #include "sfm-private.h"
40 #include "dictionary.h"
41 #include "file-handle-def.h"
42 #include "file-name.h"
44 #include "value-labels.h"
49 #define _(msgid) gettext (msgid)
51 /* System file reader. */
54 struct file_handle *fh; /* File handle. */
55 FILE *file; /* File stream. */
57 int reverse_endian; /* 1=file has endianness opposite us. */
58 int value_cnt; /* Number of `union values's per case. */
59 long case_cnt; /* Number of cases, -1 if unknown. */
60 int compressed; /* 1=compressed, 0=not compressed. */
61 double bias; /* Compression bias, usually 100.0. */
62 int weight_idx; /* 0-based index of weighting variable, or -1. */
63 bool ok; /* False after an I/O error or corrupt data. */
64 bool has_vls; /* True if the file has one or more Very Long Strings*/
70 /* File's special constants. */
75 /* Decompression buffer. */
76 flt64 *buf; /* Buffer data. */
77 flt64 *ptr; /* Current location in buffer. */
78 flt64 *end; /* End of buffer data. */
80 /* Compression instruction octet. */
81 unsigned char x[8]; /* Current instruction octet. */
82 unsigned char *y; /* Location in current instruction octet. */
85 /* A variable in a system file. */
88 int width; /* 0=numeric, otherwise string width. */
89 int fv; /* Index into case. */
94 /* Swap bytes *A and *B. */
96 bswap (char *a, char *b)
103 /* Reverse the byte order of 32-bit integer *X. */
105 bswap_int32 (int32_t *x_)
107 char *x = (char *) x_;
108 bswap (x + 0, x + 3);
109 bswap (x + 1, x + 2);
112 /* Reverse the byte order of 64-bit floating point *X. */
114 bswap_flt64 (flt64 *x_)
116 char *x = (char *) x_;
117 bswap (x + 0, x + 7);
118 bswap (x + 1, x + 6);
119 bswap (x + 2, x + 5);
120 bswap (x + 3, x + 4);
124 corrupt_msg (int class, const char *format,...)
125 PRINTF_FORMAT (2, 3);
127 /* Displays a corrupt sysfile error. */
129 corrupt_msg (int class, const char *format,...)
135 ds_init_cstr (&text, _("corrupt system file: "));
136 va_start (args, format);
137 ds_put_vformat (&text, format, args);
140 m.category = msg_class_to_category (class);
141 m.severity = msg_class_to_severity (class);
142 m.where.file_name = NULL;
143 m.where.line_number = 0;
144 m.text = ds_cstr (&text);
149 /* Closes a system file after we're done with it. */
151 sfm_close_reader (struct sfm_reader *r)
158 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
159 msg (ME, _("%s: Closing system file: %s."),
160 fh_get_file_name (r->fh), strerror (errno));
165 fh_close (r->fh, "system file", "rs");
172 /* Dictionary reader. */
174 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
176 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
179 static int read_header (struct sfm_reader *,
180 struct dictionary *, struct sfm_read_info *);
181 static int parse_format_spec (struct sfm_reader *, int32_t,
182 struct fmt_spec *, const struct variable *);
183 static int read_value_labels (struct sfm_reader *, struct dictionary *,
184 struct variable **var_by_idx);
185 static int read_variables (struct sfm_reader *,
186 struct dictionary *, struct variable ***var_by_idx);
187 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
188 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
189 static int read_documents (struct sfm_reader *, struct dictionary *);
191 static int fread_ok (struct sfm_reader *, void *, size_t);
193 /* Displays the message X with corrupt_msg, then jumps to the error
201 /* Calls buf_read with the specified arguments, and jumps to
202 error if the read fails. */
203 #define assertive_buf_read(a,b,c,d) \
205 if (!buf_read (a,b,c,d)) \
217 pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
221 const struct name_pair *p1 = _p1;
222 const struct name_pair *p2 = _p2;
224 char buf1[SHORT_NAME_LEN + 1];
225 char buf2[SHORT_NAME_LEN + 1];
227 memset(buf1, 0, SHORT_NAME_LEN + 1);
228 memset(buf2, 0, SHORT_NAME_LEN + 1);
230 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
232 buf1[i] = p1->shortname[i];
233 if ( '\0' == buf1[i])
237 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
239 buf2[i] = p2->shortname[i];
240 if ( '\0' == buf2[i])
244 return strncmp(buf1, buf2, SHORT_NAME_LEN);
248 pair_sn_hash(const void *_p, void *aux UNUSED)
251 const struct name_pair *p = _p;
252 char buf[SHORT_NAME_LEN + 1];
254 memset(buf, 0, SHORT_NAME_LEN + 1);
255 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
257 buf[i] = p->shortname[i];
262 return hsh_hash_bytes(buf, strlen(buf));
266 pair_sn_free(void *p, void *aux UNUSED)
273 /* Opens the system file designated by file handle FH for
274 reading. Reads the system file's dictionary into *DICT.
275 If INFO is non-null, then it receives additional info about the
278 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
279 struct sfm_read_info *info)
281 struct sfm_reader *r = NULL;
282 struct variable **var_by_idx = NULL;
284 /* The data in record 7(14) */
285 char *subrec14data = 0;
287 /* A hash table of long variable names indexed by short name */
288 struct hsh_table *short_to_long = NULL;
290 *dict = dict_create ();
291 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
294 /* Create and initialize reader. */
295 r = xmalloc (sizeof *r);
297 r->file = fn_open (fh_get_file_name (fh), "rb");
299 r->reverse_endian = 0;
310 r->sysmis = -FLT64_MAX;
311 r->highest = FLT64_MAX;
312 r->lowest = second_lowest_flt64;
314 r->buf = r->ptr = r->end = NULL;
315 r->y = r->x + sizeof r->x;
317 /* Check that file open succeeded. */
320 msg (ME, _("An error occurred while opening \"%s\" for reading "
321 "as a system file: %s."),
322 fh_get_file_name (r->fh), strerror (errno));
326 /* Read header and variables. */
327 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
331 /* Handle weighting. */
332 if (r->weight_idx != -1)
334 struct variable *weight_var;
336 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
337 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
338 "and number of elements per case (%d)."),
339 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
342 weight_var = var_by_idx[r->weight_idx];
344 if (weight_var == NULL)
346 _("%s: Weighting variable may not be a continuation of "
347 "a long string variable."), fh_get_file_name (fh)));
348 else if (weight_var->type == ALPHA)
349 lose ((ME, _("%s: Weighting variable may not be a string variable."),
350 fh_get_file_name (fh)));
352 dict_set_weight (*dict, weight_var);
355 dict_set_weight (*dict, NULL);
357 /* Read records of types 3, 4, 6, and 7. */
362 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
363 if (r->reverse_endian)
364 bswap_int32 (&rec_type);
370 if (!read_value_labels (r, *dict, var_by_idx))
375 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
376 "records must always immediately follow type 3 "
378 fh_get_file_name (r->fh)));
381 if (!read_documents (r, *dict))
398 assertive_buf_read (r, &data, sizeof data, 0);
399 if (r->reverse_endian)
401 bswap_int32 (&data.subtype);
402 bswap_int32 (&data.size);
403 bswap_int32 (&data.count);
405 bytes = data.size * data.count;
407 if (bytes < data.size || bytes < data.count)
408 lose ((ME, "%s: Record type %d subtype %d too large.",
409 fh_get_file_name (r->fh), rec_type, data.subtype));
411 switch (data.subtype)
414 if (!read_machine_int32_info (r, data.size, data.count))
419 if (!read_machine_flt64_info (r, data.size, data.count))
424 case 6: /* ?? Used by SPSS 8.0. */
428 case 11: /* Variable display parameters */
430 const int n_vars = data.count / 3 ;
432 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
434 msg (MW, _("%s: Invalid subrecord length. "
435 "Record: 7; Subrecord: 11"),
436 fh_get_file_name (r->fh));
441 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
453 assertive_buf_read (r, ¶ms, sizeof(params), 0);
455 if ( ! measure_is_valid(params.measure)
457 ! alignment_is_valid(params.align))
460 _("%s: Invalid variable display parameters. Default parameters substituted."),
461 fh_get_file_name(r->fh));
465 v = dict_get_var(*dict, i);
467 v->measure = params.measure;
468 v->display_width = params.width;
469 v->alignment = params.align;
474 case 13: /* SPSS 12.0 Long variable name map */
476 char *short_name, *save_ptr;
480 subrec14data = xmalloc (bytes + 1);
481 if (!buf_read (r, subrec14data, bytes, 0))
485 subrec14data[bytes] = '\0';
487 short_to_long = hsh_create(4,
494 for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
496 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
498 struct name_pair *pair ;
499 char *long_name = strtok_r (NULL, "\t", &save_ptr);
502 /* Validate long name. */
503 if (long_name == NULL)
505 msg (MW, _("%s: Trailing garbage in long variable "
507 fh_get_file_name (r->fh));
510 if (!var_is_valid_name (long_name, false))
512 msg (MW, _("%s: Long variable mapping to invalid "
513 "variable name `%s'."),
514 fh_get_file_name (r->fh), long_name);
518 /* Find variable using short name. */
519 v = dict_lookup_var (*dict, short_name);
522 msg (MW, _("%s: Long variable mapping for "
523 "nonexistent variable %s."),
524 fh_get_file_name (r->fh), short_name);
528 /* Identify any duplicates. */
529 if ( compare_var_names(short_name, long_name, 0) &&
530 NULL != dict_lookup_var (*dict, long_name))
531 lose ((ME, _("%s: Duplicate long variable name `%s' "
532 "within system file."),
533 fh_get_file_name (r->fh), long_name));
537 Renaming a variable may clear the short
538 name, but we want to retain it, so
539 re-set it explicitly. */
540 dict_rename_var (*dict, v, long_name);
541 var_set_short_name (v, short_name);
543 pair = xmalloc(sizeof *pair);
544 pair->shortname = short_name;
545 pair->longname = long_name;
546 hsh_insert(short_to_long, pair);
548 /* This messes up the processing of subtype 14 (below).
549 I'm not sure if it is needed anyway, so I'm removing it for
550 now. If it's needed, then it will need to be done after all the
551 records have been processed. --- JMD 27 April 2006
554 /* For compatibility, make sure dictionary
555 is in long variable name map order. In
556 the common case, this has no effect,
557 because the dictionary and the long
558 variable name map are already in the
560 dict_reorder_var (*dict, v, idx);
570 bool eq_seen = false;
574 char *buffer = xmalloc (bytes + 1);
575 if (!buf_read (r, buffer, bytes, 0))
580 buffer[bytes] = '\0';
584 /* Note: SPSS v13 terminates this record with 00,
585 whereas SPSS v14 terminates it with 00 09. We must
587 for(i = 0; i < bytes ; ++i)
590 static char name[SHORT_NAME_LEN + 1] = {0};
591 static char len_str[6] ={0};
600 length = strtol(len_str, 0, 10);
601 if ( length != LONG_MAX && length != LONG_MIN)
603 char *lookup_name = name;
610 struct name_pair pair;
613 pair.shortname = name;
614 p = hsh_find(short_to_long, &pair);
616 lookup_name = p->longname;
619 v = dict_lookup_var(*dict, lookup_name);
623 _("%s: No variable called %s but it is listed in length table."),
624 fh_get_file_name (r->fh), lookup_name);
631 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
632 l -= EFFECTIVE_LONG_STRING_LENGTH;
639 struct variable *v_next;
640 v_next = dict_get_var(*dict, idx + 1);
642 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
643 l -= EFFECTIVE_LONG_STRING_LENGTH;
647 dict_delete_var(*dict, v_next);
650 assert ( length > MAX_LONG_STRING );
653 v->print.w = v->width;
654 v->write.w = v->width;
655 v->nv = DIV_RND_UP (length, MAX_SHORT_STRING);
658 memset(name, 0, SHORT_NAME_LEN+1);
659 memset(len_str, 0, 6);
666 len_str[j] = buffer[i];
674 dict_compact_values(*dict);
679 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
680 "encountered in system file."),
681 fh_get_file_name (r->fh), data.subtype);
687 void *x = buf_read (r, NULL, data.size * data.count, 0);
699 assertive_buf_read (r, &filler, sizeof filler, 0);
705 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
706 fh_get_file_name (r->fh), rec_type);
711 /* Come here on successful completion. */
713 /* Create an index of dictionary variable widths for
714 sfm_read_case to use. We cannot use the `struct variables'
715 from the dictionary we created, because the caller owns the
716 dictionary and may destroy or modify its variables. */
720 r->var_cnt = dict_get_var_cnt (*dict);
721 r->vars = xnmalloc (r->var_cnt, sizeof *r->vars);
722 for (i = 0; i < r->var_cnt; i++)
724 struct variable *v = dict_get_var (*dict, i);
725 struct sfm_var *sv = &r->vars[i];
726 sv->width = v->width;
732 hsh_destroy(short_to_long);
737 /* Come here on unsuccessful completion. */
738 sfm_close_reader (r);
740 hsh_destroy(short_to_long);
744 dict_destroy (*dict);
750 /* Read record type 7, subtype 3. */
752 read_machine_int32_info (struct sfm_reader *r, int size, int count)
759 if (size != sizeof (int32_t) || count != 8)
760 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
761 "subtype 3. Expected size %d, count 8."),
762 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
764 assertive_buf_read (r, data, sizeof data, 0);
765 if (r->reverse_endian)
766 for (i = 0; i < 8; i++)
767 bswap_int32 (&data[i]);
771 lose ((ME, _("%s: Floating-point representation in system file is not "
772 "IEEE-754. PSPP cannot convert between floating-point "
774 fh_get_file_name (r->fh)));
776 #error Add support for your floating-point format.
779 #ifdef WORDS_BIGENDIAN
784 if (r->reverse_endian)
786 if (file_bigendian ^ (data[6] == 1))
787 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
788 "endianness intuited from file header (%s)."),
789 fh_get_file_name (r->fh),
790 file_bigendian ? _("big-endian") : _("little-endian"),
791 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
794 /* PORTME: Character representation code. */
795 if (data[7] != 2 && data[7] != 3)
796 lose ((ME, _("%s: File-indicated character representation code (%s) is "
798 fh_get_file_name (r->fh),
799 (data[7] == 1 ? "EBCDIC"
800 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
808 /* Read record type 7, subtype 4. */
810 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
815 if (size != sizeof (flt64) || count != 3)
816 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
817 "subtype 4. Expected size %d, count 8."),
818 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
820 assertive_buf_read (r, data, sizeof data, 0);
821 if (r->reverse_endian)
822 for (i = 0; i < 3; i++)
823 bswap_flt64 (&data[i]);
825 if (data[0] != SYSMIS || data[1] != FLT64_MAX
826 || data[2] != second_lowest_flt64)
829 r->highest = data[1];
831 msg (MW, _("%s: File-indicated value is different from internal value "
832 "for at least one of the three system values. SYSMIS: "
833 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
835 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
836 (double) data[1], (double) FLT64_MAX,
837 (double) data[2], (double) second_lowest_flt64);
847 read_header (struct sfm_reader *r,
848 struct dictionary *dict, struct sfm_read_info *info)
850 struct sysfile_header hdr; /* Disk buffer. */
851 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
852 int skip_amt = 0; /* Amount of product name to omit. */
855 /* Read header, check magic. */
856 assertive_buf_read (r, &hdr, sizeof hdr, 0);
857 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
858 lose ((ME, _("%s: Bad magic. Proper system files begin with "
859 "the four characters `$FL2'. This file will not be read."),
860 fh_get_file_name (r->fh)));
862 /* Check eye-category.her string. */
863 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
864 for (i = 0; i < 60; i++)
865 if (!c_isprint ((unsigned char) prod_name[i]))
867 for (i = 59; i >= 0; i--)
868 if (!c_isgraph ((unsigned char) prod_name[i]))
873 prod_name[60] = '\0';
877 static const char *prefix[N_PREFIXES] =
879 "@(#) SPSS DATA FILE",
885 for (i = 0; i < N_PREFIXES; i++)
886 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
888 skip_amt = strlen (prefix[i]);
893 /* Check endianness. */
894 if (hdr.layout_code == 2)
895 r->reverse_endian = 0;
898 bswap_int32 (&hdr.layout_code);
899 if (hdr.layout_code != 2)
900 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
901 "should be 2, in big-endian or little-endian format."),
902 fh_get_file_name (r->fh), hdr.layout_code));
904 r->reverse_endian = 1;
905 bswap_int32 (&hdr.nominal_case_size);
906 bswap_int32 (&hdr.compress);
907 bswap_int32 (&hdr.weight_idx);
908 bswap_int32 (&hdr.case_cnt);
909 bswap_flt64 (&hdr.bias);
913 /* Copy basic info and verify correctness. */
914 r->value_cnt = hdr.nominal_case_size;
916 /* If value count is ridiculous, then force it to -1 (a
918 if ( r->value_cnt < 0 ||
919 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
922 r->compressed = hdr.compress;
924 r->weight_idx = hdr.weight_idx - 1;
926 r->case_cnt = hdr.case_cnt;
927 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
929 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
930 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
933 if (r->bias != 100.0)
934 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
936 fh_get_file_name (r->fh), r->bias);
938 /* Make a file label only on the condition that the given label is
939 not all spaces or nulls. */
943 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
945 if (!c_isspace ((unsigned char) hdr.file_label[i])
946 && hdr.file_label[i] != 0)
948 char *label = xmalloc (i + 2);
949 memcpy (label, hdr.file_label, i + 1);
951 dict_set_label (dict, label);
962 memcpy (info->creation_date, hdr.creation_date, 9);
963 info->creation_date[9] = 0;
965 memcpy (info->creation_time, hdr.creation_time, 8);
966 info->creation_time[8] = 0;
968 #ifdef WORDS_BIGENDIAN
969 info->big_endian = !r->reverse_endian;
971 info->big_endian = r->reverse_endian;
974 info->compressed = hdr.compress;
976 info->case_cnt = hdr.case_cnt;
978 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
979 if (c_isgraph ((unsigned char) *cp))
981 strcpy (info->product, cp);
990 /* Reads most of the dictionary from file H; also fills in the
991 associated VAR_BY_IDX array. */
993 read_variables (struct sfm_reader *r,
994 struct dictionary *dict, struct variable ***var_by_idx)
998 struct sysfile_variable sv; /* Disk buffer. */
999 int long_string_count = 0; /* # of long string continuation
1000 records still expected. */
1001 int next_value = 0; /* Index to next `value' structure. */
1008 /* Read in the entry for each variable and use the info to
1009 initialize the dictionary. */
1012 struct variable *vv;
1013 char name[SHORT_NAME_LEN + 1];
1017 assertive_buf_read (r, &sv, sizeof sv, 0);
1019 if (r->reverse_endian)
1021 bswap_int32 (&sv.rec_type);
1022 bswap_int32 (&sv.type);
1023 bswap_int32 (&sv.has_var_label);
1024 bswap_int32 (&sv.n_missing_values);
1025 bswap_int32 (&sv.print);
1026 bswap_int32 (&sv.write);
1029 /* We've come to the end of the variable entries */
1030 if (sv.rec_type != 2)
1032 buf_unread(r, sizeof sv);
1037 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
1039 /* If there was a long string previously, make sure that the
1040 continuations are present; otherwise make sure there aren't
1042 if (long_string_count)
1045 lose ((ME, _("%s: position %d: String variable does not have "
1046 "proper number of continuation records."),
1047 fh_get_file_name (r->fh), i));
1050 (*var_by_idx)[i] = NULL;
1051 long_string_count--;
1054 else if (sv.type == -1)
1055 lose ((ME, _("%s: position %d: Superfluous long string continuation "
1057 fh_get_file_name (r->fh), i));
1059 /* Check fields for validity. */
1060 if (sv.type < 0 || sv.type > 255)
1061 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1062 fh_get_file_name (r->fh), i, sv.type));
1063 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1064 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1065 "0 or 1."), fh_get_file_name (r->fh), i));
1066 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1067 || sv.n_missing_values == -1)
1068 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1069 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1071 /* Copy first character of variable name. */
1072 if (sv.name[0] == '@' || sv.name[0] == '#')
1073 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1075 fh_get_file_name (r->fh), i));
1077 name[0] = sv.name[0];
1079 /* Copy remaining characters of variable name. */
1080 for (j = 1; j < SHORT_NAME_LEN; j++)
1082 int c = (unsigned char) sv.name[j];
1091 if ( ! var_is_plausible_name(name, false) )
1092 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1093 fh_get_file_name (r->fh), name));
1095 /* Create variable. */
1096 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1098 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1099 fh_get_file_name (r->fh), name));
1101 /* Set the short name the same as the long name */
1102 var_set_short_name (vv, vv->name);
1104 /* Case reading data. */
1105 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1106 long_string_count = nv - 1;
1109 /* Get variable label, if any. */
1110 if (sv.has_var_label == 1)
1115 /* Read length of label. */
1116 assertive_buf_read (r, &len, sizeof len, 0);
1117 if (r->reverse_endian)
1121 if (len < 0 || len > 255)
1122 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1124 fh_get_file_name (r->fh), vv->name, len));
1128 /* Read label into variable structure. */
1129 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1130 if (vv->label == NULL)
1132 vv->label[len] = '\0';
1136 /* Set missing values. */
1137 if (sv.n_missing_values != 0)
1140 int mv_cnt = abs (sv.n_missing_values);
1142 if (vv->width > MAX_SHORT_STRING)
1143 lose ((ME, _("%s: Long string variable %s may not have missing "
1145 fh_get_file_name (r->fh), vv->name));
1147 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1149 if (r->reverse_endian && vv->type == NUMERIC)
1150 for (j = 0; j < mv_cnt; j++)
1151 bswap_flt64 (&mv[j]);
1153 if (sv.n_missing_values > 0)
1155 for (j = 0; j < sv.n_missing_values; j++)
1156 if (vv->type == NUMERIC)
1157 mv_add_num (&vv->miss, mv[j]);
1159 mv_add_str (&vv->miss, (char *) &mv[j]);
1163 if (vv->type == ALPHA)
1164 lose ((ME, _("%s: String variable %s may not have missing "
1165 "values specified as a range."),
1166 fh_get_file_name (r->fh), vv->name));
1168 if (mv[0] == r->lowest)
1169 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1170 else if (mv[1] == r->highest)
1171 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1173 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1175 if (sv.n_missing_values == -3)
1176 mv_add_num (&vv->miss, mv[2]);
1180 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1181 || !parse_format_spec (r, sv.write, &vv->write, vv))
1185 /* Some consistency checks. */
1186 if (long_string_count != 0)
1187 lose ((ME, _("%s: Long string continuation records omitted at end of "
1189 fh_get_file_name (r->fh)));
1191 if (next_value != r->value_cnt)
1192 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1193 "%d were read from file."),
1194 fh_get_file_name (r->fh), r->value_cnt, next_value);
1203 /* Translates the format spec from sysfile format to internal
1206 parse_format_spec (struct sfm_reader *r, int32_t s,
1207 struct fmt_spec *f, const struct variable *v)
1209 f->type = translate_fmt ((s >> 16) & 0xff);
1211 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1212 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1213 f->w = (s >> 8) & 0xff;
1216 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1217 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1218 fh_get_file_name (r->fh),
1219 v->type == ALPHA ? _("String") : _("Numeric"),
1221 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1222 formats[f->type].name));
1224 if (!check_output_specifier (f, false)
1225 || !check_specifier_width (f, v->width, false))
1227 msg (ME, _("%s variable %s has invalid format specifier %s."),
1228 v->type == NUMERIC ? _("Numeric") : _("String"),
1229 v->name, fmt_to_string (f));
1230 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1238 /* Reads value labels from sysfile H and inserts them into the
1239 associated dictionary. */
1241 read_value_labels (struct sfm_reader *r,
1242 struct dictionary *dict, struct variable **var_by_idx)
1246 char raw_value[8]; /* Value as uninterpreted bytes. */
1247 union value value; /* Value. */
1248 char *label; /* Null-terminated label string. */
1251 struct label *labels = NULL;
1252 int32_t n_labels; /* Number of labels. */
1254 struct variable **var = NULL; /* Associated variables. */
1255 int32_t n_vars; /* Number of associated variables. */
1259 /* First step: read the contents of the type 3 record and record its
1260 contents. Note that we can't do much with the data since we
1261 don't know yet whether it is of numeric or string type. */
1263 /* Read number of labels. */
1264 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1265 if (r->reverse_endian)
1266 bswap_int32 (&n_labels);
1268 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1270 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1271 fh_get_file_name (r->fh), n_labels);
1275 /* Allocate memory. */
1276 labels = xcalloc (n_labels, sizeof *labels);
1277 for (i = 0; i < n_labels; i++)
1278 labels[i].label = NULL;
1280 /* Read each value/label tuple into labels[]. */
1281 for (i = 0; i < n_labels; i++)
1283 struct label *label = labels + i;
1284 unsigned char label_len;
1288 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1290 /* Read label length. */
1291 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1292 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1294 /* Read label, padding. */
1295 label->label = xmalloc (padded_len + 1);
1296 assertive_buf_read (r, label->label, padded_len - 1, 0);
1297 label->label[label_len] = 0;
1300 /* Second step: Read the type 4 record that has the list of
1301 variables to which the value labels are to be applied. */
1303 /* Read record type of type 4 record. */
1307 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1308 if (r->reverse_endian)
1309 bswap_int32 (&rec_type);
1312 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1313 "follow value label record (type 3) as it should."),
1314 fh_get_file_name (r->fh)));
1317 /* Read number of variables associated with value label from type 4
1319 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1320 if (r->reverse_endian)
1321 bswap_int32 (&n_vars);
1322 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1323 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1324 "is not between 1 and the number of variables (%d)."),
1325 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1327 /* Read the list of variables. */
1328 var = xnmalloc (n_vars, sizeof *var);
1329 for (i = 0; i < n_vars; i++)
1334 /* Read variable index, check range. */
1335 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1336 if (r->reverse_endian)
1337 bswap_int32 (&var_idx);
1338 if (var_idx < 1 || var_idx > r->value_cnt)
1339 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1340 "not between 1 and the number of values (%d)."),
1341 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1343 /* Make sure it's a real variable. */
1344 v = var_by_idx[var_idx - 1];
1346 lose ((ME, _("%s: Variable index associated with value label (%d) "
1347 "refers to a continuation of a string variable, not to "
1348 "an actual variable."),
1349 fh_get_file_name (r->fh), var_idx));
1350 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1351 lose ((ME, _("%s: Value labels are not allowed on long string "
1353 fh_get_file_name (r->fh), v->name));
1355 /* Add it to the list of variables. */
1359 /* Type check the variables. */
1360 for (i = 1; i < n_vars; i++)
1361 if (var[i]->type != var[0]->type)
1362 lose ((ME, _("%s: Variables associated with value label are not all of "
1363 "identical type. Variable %s has %s type, but variable "
1365 fh_get_file_name (r->fh),
1366 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1367 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1369 /* Fill in labels[].value, now that we know the desired type. */
1370 for (i = 0; i < n_labels; i++)
1372 struct label *label = labels + i;
1374 if (var[0]->type == ALPHA)
1376 const int copy_len = min (sizeof label->raw_value,
1377 sizeof label->label);
1378 memcpy (label->value.s, label->raw_value, copy_len);
1381 assert (sizeof f == sizeof label->raw_value);
1382 memcpy (&f, label->raw_value, sizeof f);
1383 if (r->reverse_endian)
1389 /* Assign the value_label's to each variable. */
1390 for (i = 0; i < n_vars; i++)
1392 struct variable *v = var[i];
1395 /* Add each label to the variable. */
1396 for (j = 0; j < n_labels; j++)
1398 struct label *label = labels + j;
1399 if (!val_labs_replace (v->val_labs, label->value, label->label))
1402 if (var[0]->type == NUMERIC)
1403 msg (MW, _("%s: File contains duplicate label for value %g for "
1405 fh_get_file_name (r->fh), label->value.f, v->name);
1407 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1408 "for variable %s."),
1409 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1413 for (i = 0; i < n_labels; i++)
1414 free (labels[i].label);
1422 for (i = 0; i < n_labels; i++)
1423 free (labels[i].label);
1430 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1431 non-NULL, uses that as the buffer; otherwise allocates at least
1432 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1435 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1439 if (buf == NULL && byte_cnt > 0 )
1440 buf = xmalloc (max (byte_cnt, min_alloc));
1442 if ( byte_cnt == 0 )
1446 if (1 != fread (buf, byte_cnt, 1, r->file))
1448 if (ferror (r->file))
1449 msg (ME, _("%s: Reading system file: %s."),
1450 fh_get_file_name (r->fh), strerror (errno));
1452 corrupt_msg (ME, _("%s: Unexpected end of file."),
1453 fh_get_file_name (r->fh));
1461 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1463 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1465 assert(byte_cnt > 0);
1467 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1469 msg (ME, _("%s: Seeking system file: %s."),
1470 fh_get_file_name (r->fh), strerror (errno));
1474 /* Reads a document record, type 6, from system file R, and sets up
1475 the documents and n_documents fields in the associated
1478 read_documents (struct sfm_reader *r, struct dictionary *dict)
1483 if (dict_get_documents (dict) != NULL)
1484 lose ((ME, _("%s: System file contains multiple "
1485 "type 6 (document) records."),
1486 fh_get_file_name (r->fh)));
1488 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1490 lose ((ME, _("%s: Number of document lines (%ld) "
1491 "must be greater than 0."),
1492 fh_get_file_name (r->fh), (long) line_cnt));
1494 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1495 /* FIXME? Run through asciify. */
1496 if (documents == NULL)
1498 documents[80 * line_cnt] = '\0';
1499 dict_set_documents (dict, documents);
1509 /* Reads compressed data into H->BUF and sets other pointers
1510 appropriately. Returns nonzero only if both no errors occur and
1513 buffer_input (struct sfm_reader *r)
1520 r->buf = xnmalloc (128, sizeof *r->buf);
1521 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1522 if (ferror (r->file))
1524 msg (ME, _("%s: Error reading file: %s."),
1525 fh_get_file_name (r->fh), strerror (errno));
1530 r->end = &r->buf[amt];
1534 /* Reads a single case consisting of compressed data from system
1535 file H into the array BUF[] according to reader R, and
1536 returns nonzero only if successful. */
1537 /* Data in system files is compressed in this manner. Data
1538 values are grouped into sets of eight ("octets"). Each value
1539 in an octet has one instruction byte that are output together.
1540 Each instruction byte gives a value for that byte or indicates
1541 that the value can be found following the instructions. */
1543 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1545 const unsigned char *p_end = r->x + sizeof (flt64);
1546 unsigned char *p = r->y;
1548 const flt64 *buf_beg = buf;
1549 const flt64 *buf_end = &buf[r->value_cnt];
1553 for (; p < p_end; p++){
1557 /* Code 0 is ignored. */
1560 /* Code 252 is end of file. */
1563 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1564 "in partial case."),
1565 fh_get_file_name (r->fh)));
1567 /* Code 253 indicates that the value is stored explicitly
1568 following the instruction bytes. */
1569 if (r->ptr == NULL || r->ptr >= r->end)
1570 if (!buffer_input (r))
1571 lose ((ME, _("%s: Unexpected end of file."),
1572 fh_get_file_name (r->fh)));
1573 memcpy (buf++, r->ptr++, sizeof *buf);
1578 /* Code 254 indicates a string that is all blanks. */
1579 memset (buf++, ' ', sizeof *buf);
1584 /* Code 255 indicates the system-missing value. */
1586 if (r->reverse_endian)
1593 /* Codes 1 through 251 inclusive are taken to indicate a
1594 value of (BYTE - BIAS), where BYTE is the byte's value
1595 and BIAS is the compression bias (generally 100.0). */
1596 *buf = *p - r->bias;
1597 if (r->reverse_endian)
1605 /* We have reached the end of this instruction octet. Read
1607 if (r->ptr == NULL || r->ptr >= r->end)
1609 if (!buffer_input (r))
1612 lose ((ME, _("%s: Unexpected end of file."),
1613 fh_get_file_name (r->fh)));
1618 memcpy (r->x, r->ptr++, sizeof *buf);
1625 /* We have filled up an entire record. Update state and return
1636 /* Reads one case from READER's file into C. Returns nonzero
1637 only if successful. */
1639 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1644 if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls)
1646 /* Fast path: external and internal representations are the
1647 same, except possibly for endianness or SYSMIS. Read
1648 directly into the case's buffer, then fix up any minor
1649 details as needed. */
1650 if (!fread_ok (r, case_data_all_rw (c),
1651 sizeof (union value) * r->value_cnt))
1654 /* Fix up endianness if needed. */
1655 if (r->reverse_endian)
1659 for (i = 0; i < r->var_cnt; i++)
1660 if (r->vars[i].width == 0)
1661 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1664 /* Fix up SYSMIS values if needed.
1665 I don't think this will ever actually kick in, but it
1667 if (r->sysmis != SYSMIS)
1671 for (i = 0; i < r->var_cnt; i++)
1672 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1673 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1678 /* Slow path: internal and external representations differ.
1679 Read into a bounce buffer, then copy to C. */
1686 bounce_size = sizeof *bounce * r->value_cnt;
1687 bounce = bounce_cur = local_alloc (bounce_size);
1689 memset(bounce, 0, bounce_size);
1692 read_ok = fread_ok (r, bounce, bounce_size);
1694 read_ok = read_compressed_data (r, bounce);
1697 local_free (bounce);
1701 for (i = 0; i < r->var_cnt; i++)
1703 struct sfm_var *sv = &r->vars[i];
1707 flt64 f = *bounce_cur++;
1708 if (r->reverse_endian)
1710 case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f;
1714 flt64 *bc_start = bounce_cur;
1716 while (ofs < sv->width )
1718 const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs);
1719 memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk);
1721 bounce_cur += DIV_RND_UP (chunk, sizeof (flt64));
1725 bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64);
1729 local_free (bounce);
1735 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1737 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1739 if (read_bytes == byte_cnt)
1743 if (ferror (r->file))
1745 msg (ME, _("%s: Reading system file: %s."),
1746 fh_get_file_name (r->fh), strerror (errno));
1749 else if (read_bytes != 0)
1751 msg (ME, _("%s: Partial record at end of system file."),
1752 fh_get_file_name (r->fh));
1759 /* Returns true if an I/O error has occurred on READER, false
1762 sfm_read_error (const struct sfm_reader *reader)
1767 /* Returns true if FILE is an SPSS system file,
1770 sfm_detect (FILE *file)
1772 struct sysfile_header hdr;
1774 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1776 if (strncmp ("$FL2", hdr.rec_type, 4))