1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 #include <libpspp/alloc.h>
28 #include <libpspp/message.h>
29 #include <libpspp/compiler.h>
30 #include <libpspp/magic.h>
31 #include <libpspp/misc.h>
32 #include <libpspp/str.h>
33 #include <libpspp/hash.h>
35 #include "sys-file-reader.h"
36 #include "sfm-private.h"
38 #include "dictionary.h"
39 #include "file-handle-def.h"
40 #include "file-name.h"
42 #include "value-labels.h"
47 #define _(msgid) gettext (msgid)
49 /* System file reader. */
52 struct file_handle *fh; /* File handle. */
53 FILE *file; /* File stream. */
55 int reverse_endian; /* 1=file has endianness opposite us. */
56 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
57 int value_cnt; /* Number of `union values's per case. */
58 long case_cnt; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_idx; /* 0-based index of weighting variable, or -1. */
62 bool ok; /* False after an I/O error or corrupt data. */
65 struct sfm_var *vars; /* Variables. */
67 /* File's special constants. */
72 /* Decompression buffer. */
73 flt64 *buf; /* Buffer data. */
74 flt64 *ptr; /* Current location in buffer. */
75 flt64 *end; /* End of buffer data. */
77 /* Compression instruction octet. */
78 unsigned char x[8]; /* Current instruction octet. */
79 unsigned char *y; /* Location in current instruction octet. */
82 /* A variable in a system file. */
85 int width; /* 0=numeric, otherwise string width. */
86 int fv; /* Index into case. */
91 /* Swap bytes *A and *B. */
93 bswap (char *a, char *b)
100 /* Reverse the byte order of 32-bit integer *X. */
102 bswap_int32 (int32_t *x_)
104 char *x = (char *) x_;
105 bswap (x + 0, x + 3);
106 bswap (x + 1, x + 2);
109 /* Reverse the byte order of 64-bit floating point *X. */
111 bswap_flt64 (flt64 *x_)
113 char *x = (char *) x_;
114 bswap (x + 0, x + 7);
115 bswap (x + 1, x + 6);
116 bswap (x + 2, x + 5);
117 bswap (x + 3, x + 4);
121 corrupt_msg (int class, const char *format,...)
122 PRINTF_FORMAT (2, 3);
124 /* Displays a corrupt sysfile error. */
126 corrupt_msg (int class, const char *format,...)
132 ds_create (&text, _("corrupt system file: "));
133 va_start (args, format);
134 ds_vprintf (&text, format, args);
137 m.category = msg_class_to_category (class);
138 m.severity = msg_class_to_severity (class);
139 m.where.file_name = NULL;
140 m.where.line_number = 0;
141 m.text = ds_c_str (&text);
146 /* Closes a system file after we're done with it. */
148 sfm_close_reader (struct sfm_reader *r)
155 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
156 msg (ME, _("%s: Closing system file: %s."),
157 fh_get_file_name (r->fh), strerror (errno));
162 fh_close (r->fh, "system file", "rs");
169 /* Dictionary reader. */
171 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
173 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
176 static int read_header (struct sfm_reader *,
177 struct dictionary *, struct sfm_read_info *);
178 static int parse_format_spec (struct sfm_reader *, int32_t,
179 struct fmt_spec *, const struct variable *);
180 static int read_value_labels (struct sfm_reader *, struct dictionary *,
181 struct variable **var_by_idx);
182 static int read_variables (struct sfm_reader *,
183 struct dictionary *, struct variable ***var_by_idx);
184 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
185 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
186 static int read_documents (struct sfm_reader *, struct dictionary *);
188 static int fread_ok (struct sfm_reader *, void *, size_t);
190 /* Displays the message X with corrupt_msg, then jumps to the error
198 /* Calls buf_read with the specified arguments, and jumps to
199 error if the read fails. */
200 #define assertive_buf_read(a,b,c,d) \
202 if (!buf_read (a,b,c,d)) \
214 pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED)
218 const struct name_pair *p1 = _p1;
219 const struct name_pair *p2 = _p2;
221 char buf1[SHORT_NAME_LEN + 1];
222 char buf2[SHORT_NAME_LEN + 1];
224 memset(buf1, 0, SHORT_NAME_LEN + 1);
225 memset(buf2, 0, SHORT_NAME_LEN + 1);
227 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
229 buf1[i] = p1->shortname[i];
230 if ( '\0' == buf1[i])
234 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
236 buf2[i] = p2->shortname[i];
237 if ( '\0' == buf2[i])
241 return strncmp(buf1, buf2, SHORT_NAME_LEN);
245 pair_sn_hash(const void *_p, void *aux UNUSED)
248 const struct name_pair *p = _p;
249 char buf[SHORT_NAME_LEN + 1];
251 memset(buf, 0, SHORT_NAME_LEN + 1);
252 for (i = 0 ; i <= SHORT_NAME_LEN ; ++i )
254 buf[i] = p->shortname[i];
259 return hsh_hash_bytes(buf, strlen(buf));
263 pair_sn_free(void *p, void *aux UNUSED)
269 /* Opens the system file designated by file handle FH for
270 reading. Reads the system file's dictionary into *DICT.
271 If INFO is non-null, then it receives additional info about the
274 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
275 struct sfm_read_info *info)
277 struct sfm_reader *r = NULL;
278 struct variable **var_by_idx = NULL;
280 /* The data in record 7(14) */
281 char *subrec14data = 0;
283 /* A hash table of long variable names indexed by short name */
284 struct hsh_table *short_to_long = NULL;
287 *dict = dict_create ();
288 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
291 /* Create and initialize reader. */
292 r = xmalloc (sizeof *r);
294 r->file = fn_open (fh_get_file_name (fh), "rb");
296 r->reverse_endian = 0;
307 r->sysmis = -FLT64_MAX;
308 r->highest = FLT64_MAX;
309 r->lowest = second_lowest_flt64;
311 r->buf = r->ptr = r->end = NULL;
312 r->y = r->x + sizeof r->x;
314 /* Check that file open succeeded. */
317 msg (ME, _("An error occurred while opening \"%s\" for reading "
318 "as a system file: %s."),
319 fh_get_file_name (r->fh), strerror (errno));
323 /* Read header and variables. */
324 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
328 /* Handle weighting. */
329 if (r->weight_idx != -1)
331 struct variable *weight_var;
333 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
334 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
335 "and number of elements per case (%d)."),
336 fh_get_file_name (r->fh), r->weight_idx, r->value_cnt));
339 weight_var = var_by_idx[r->weight_idx];
341 if (weight_var == NULL)
343 _("%s: Weighting variable may not be a continuation of "
344 "a long string variable."), fh_get_file_name (fh)));
345 else if (weight_var->type == ALPHA)
346 lose ((ME, _("%s: Weighting variable may not be a string variable."),
347 fh_get_file_name (fh)));
349 dict_set_weight (*dict, weight_var);
352 dict_set_weight (*dict, NULL);
354 /* Read records of types 3, 4, 6, and 7. */
359 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
360 if (r->reverse_endian)
361 bswap_int32 (&rec_type);
367 if (!read_value_labels (r, *dict, var_by_idx))
372 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
373 "records must always immediately follow type 3 "
375 fh_get_file_name (r->fh)));
378 if (!read_documents (r, *dict))
395 assertive_buf_read (r, &data, sizeof data, 0);
396 if (r->reverse_endian)
398 bswap_int32 (&data.subtype);
399 bswap_int32 (&data.size);
400 bswap_int32 (&data.count);
402 bytes = data.size * data.count;
404 if (bytes < data.size || bytes < data.count)
405 lose ((ME, "%s: Record type %d subtype %d too large.",
406 fh_get_file_name (r->fh), rec_type, data.subtype));
408 switch (data.subtype)
411 if (!read_machine_int32_info (r, data.size, data.count))
416 if (!read_machine_flt64_info (r, data.size, data.count))
421 case 6: /* ?? Used by SPSS 8.0. */
425 case 11: /* Variable display parameters */
427 const int n_vars = data.count / 3 ;
429 if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) )
431 msg (MW, _("%s: Invalid subrecord length. "
432 "Record: 7; Subrecord: 11"),
433 fh_get_file_name (r->fh));
438 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
450 assertive_buf_read (r, ¶ms, sizeof(params), 0);
452 v = dict_get_var(*dict, i);
454 v->measure = params.measure;
455 v->display_width = params.width;
456 v->alignment = params.align;
461 case 13: /* SPSS 12.0 Long variable name map */
463 char *short_name, *save_ptr;
467 subrec14data = xmalloc (bytes + 1);
468 if (!buf_read (r, subrec14data, bytes, 0))
472 subrec14data[bytes] = '\0';
474 short_to_long = hsh_create(4,
481 for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0;
483 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
485 struct name_pair *pair ;
486 char *long_name = strtok_r (NULL, "\t", &save_ptr);
489 /* Validate long name. */
490 if (long_name == NULL)
492 msg (MW, _("%s: Trailing garbage in long variable "
494 fh_get_file_name (r->fh));
497 if (!var_is_valid_name (long_name, false))
499 msg (MW, _("%s: Long variable mapping to invalid "
500 "variable name `%s'."),
501 fh_get_file_name (r->fh), long_name);
505 /* Find variable using short name. */
506 v = dict_lookup_var (*dict, short_name);
509 msg (MW, _("%s: Long variable mapping for "
510 "nonexistent variable %s."),
511 fh_get_file_name (r->fh), short_name);
515 /* Identify any duplicates. */
516 if ( compare_var_names(short_name, long_name, 0) &&
517 NULL != dict_lookup_var (*dict, long_name))
518 lose ((ME, _("%s: Duplicate long variable name `%s' "
519 "within system file."),
520 fh_get_file_name (r->fh), long_name));
524 Renaming a variable may clear the short
525 name, but we want to retain it, so
526 re-set it explicitly. */
527 dict_rename_var (*dict, v, long_name);
528 var_set_short_name (v, short_name);
530 pair = xmalloc(sizeof *pair);
531 pair->shortname = short_name;
532 pair->longname = long_name;
533 hsh_insert(short_to_long, pair);
535 /* This messes up the processing of subtype 14 (below).
536 I'm not sure if it is needed anyway, so I'm removing it for
537 now. If it's needed, then it will need to be done after all the
538 records have been processed. --- JMD 27 April 2006
541 /* For compatability, make sure dictionary
542 is in long variable name map order. In
543 the common case, this has no effect,
544 because the dictionary and the long
545 variable name map are already in the
547 dict_reorder_var (*dict, v, idx);
557 bool eq_seen = false;
561 char *buffer = xmalloc (bytes + 1);
562 if (!buf_read (r, buffer, bytes, 0))
567 buffer[bytes] = '\0';
570 /* Note: SPSS v13 terminates this record with 00,
571 whereas SPSS v14 terminates it with 00 09. We must
573 for(i = 0; i < bytes ; ++i)
576 static char name[SHORT_NAME_LEN + 1] = {0};
577 static char len_str[6] ={0};
586 length = strtol(len_str, 0, 10);
587 if ( length != LONG_MAX && length != LONG_MIN)
589 char *lookup_name = name;
596 struct name_pair pair;
599 pair.shortname = name;
600 p = hsh_find(short_to_long, &pair);
602 lookup_name = p->longname;
605 v = dict_lookup_var(*dict, lookup_name);
609 _("%s: No variable called %s but it is listed in length table."),
610 fh_get_file_name (r->fh), lookup_name);
617 if ( v->width > EFFECTIVE_LONG_STRING_LENGTH )
618 l -= EFFECTIVE_LONG_STRING_LENGTH;
625 struct variable *v_next;
626 v_next = dict_get_var(*dict, idx + 1);
628 if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH )
629 l -= EFFECTIVE_LONG_STRING_LENGTH;
633 dict_delete_var(*dict, v_next);
637 v->print.w = v->width;
638 v->write.w = v->width;
641 memset(name, 0, SHORT_NAME_LEN+1);
642 memset(len_str, 0, 6);
649 len_str[j] = buffer[i];
661 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
662 "encountered in system file."),
663 fh_get_file_name (r->fh), data.subtype);
669 void *x = buf_read (r, NULL, data.size * data.count, 0);
681 assertive_buf_read (r, &filler, sizeof filler, 0);
687 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
688 fh_get_file_name (r->fh), rec_type);
693 /* Come here on successful completion. */
697 hsh_destroy(short_to_long);
702 /* Come here on unsuccessful completion. */
703 sfm_close_reader (r);
705 hsh_destroy(short_to_long);
709 dict_destroy (*dict);
715 /* Read record type 7, subtype 3. */
717 read_machine_int32_info (struct sfm_reader *r, int size, int count)
724 if (size != sizeof (int32_t) || count != 8)
725 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
726 "subtype 3. Expected size %d, count 8."),
727 fh_get_file_name (r->fh), size, count, sizeof (int32_t)));
729 assertive_buf_read (r, data, sizeof data, 0);
730 if (r->reverse_endian)
731 for (i = 0; i < 8; i++)
732 bswap_int32 (&data[i]);
736 lose ((ME, _("%s: Floating-point representation in system file is not "
737 "IEEE-754. PSPP cannot convert between floating-point "
739 fh_get_file_name (r->fh)));
741 #error Add support for your floating-point format.
744 #ifdef WORDS_BIGENDIAN
749 if (r->reverse_endian)
751 if (file_bigendian ^ (data[6] == 1))
752 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
753 "endianness intuited from file header (%s)."),
754 fh_get_file_name (r->fh),
755 file_bigendian ? _("big-endian") : _("little-endian"),
756 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
759 /* PORTME: Character representation code. */
760 if (data[7] != 2 && data[7] != 3)
761 lose ((ME, _("%s: File-indicated character representation code (%s) is "
763 fh_get_file_name (r->fh),
764 (data[7] == 1 ? "EBCDIC"
765 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
773 /* Read record type 7, subtype 4. */
775 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
780 if (size != sizeof (flt64) || count != 3)
781 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
782 "subtype 4. Expected size %d, count 8."),
783 fh_get_file_name (r->fh), size, count, sizeof (flt64)));
785 assertive_buf_read (r, data, sizeof data, 0);
786 if (r->reverse_endian)
787 for (i = 0; i < 3; i++)
788 bswap_flt64 (&data[i]);
790 if (data[0] != SYSMIS || data[1] != FLT64_MAX
791 || data[2] != second_lowest_flt64)
794 r->highest = data[1];
796 msg (MW, _("%s: File-indicated value is different from internal value "
797 "for at least one of the three system values. SYSMIS: "
798 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
800 fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS,
801 (double) data[1], (double) FLT64_MAX,
802 (double) data[2], (double) second_lowest_flt64);
812 read_header (struct sfm_reader *r,
813 struct dictionary *dict, struct sfm_read_info *info)
815 struct sysfile_header hdr; /* Disk buffer. */
816 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
817 int skip_amt = 0; /* Amount of product name to omit. */
820 /* Read header, check magic. */
821 assertive_buf_read (r, &hdr, sizeof hdr, 0);
822 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
823 lose ((ME, _("%s: Bad magic. Proper system files begin with "
824 "the four characters `$FL2'. This file will not be read."),
825 fh_get_file_name (r->fh)));
827 /* Check eye-category.her string. */
828 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
829 for (i = 0; i < 60; i++)
830 if (!c_isprint ((unsigned char) prod_name[i]))
832 for (i = 59; i >= 0; i--)
833 if (!c_isgraph ((unsigned char) prod_name[i]))
838 prod_name[60] = '\0';
842 static const char *prefix[N_PREFIXES] =
844 "@(#) SPSS DATA FILE",
850 for (i = 0; i < N_PREFIXES; i++)
851 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
853 skip_amt = strlen (prefix[i]);
858 /* Check endianness. */
859 if (hdr.layout_code == 2)
860 r->reverse_endian = 0;
863 bswap_int32 (&hdr.layout_code);
864 if (hdr.layout_code != 2)
865 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
866 "should be 2, in big-endian or little-endian format."),
867 fh_get_file_name (r->fh), hdr.layout_code));
869 r->reverse_endian = 1;
870 bswap_int32 (&hdr.nominal_case_size);
871 bswap_int32 (&hdr.compress);
872 bswap_int32 (&hdr.weight_idx);
873 bswap_int32 (&hdr.case_cnt);
874 bswap_flt64 (&hdr.bias);
878 /* Copy basic info and verify correctness. */
879 r->value_cnt = hdr.nominal_case_size;
881 /* If value count is ridiculous, then force it to -1 (a
883 if ( r->value_cnt < 0 ||
884 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
887 r->compressed = hdr.compress;
889 r->weight_idx = hdr.weight_idx - 1;
891 r->case_cnt = hdr.case_cnt;
892 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
894 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
895 fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2));
898 if (r->bias != 100.0)
899 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
901 fh_get_file_name (r->fh), r->bias);
903 /* Make a file label only on the condition that the given label is
904 not all spaces or nulls. */
908 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
910 if (!c_isspace ((unsigned char) hdr.file_label[i])
911 && hdr.file_label[i] != 0)
913 char *label = xmalloc (i + 2);
914 memcpy (label, hdr.file_label, i + 1);
916 dict_set_label (dict, label);
927 memcpy (info->creation_date, hdr.creation_date, 9);
928 info->creation_date[9] = 0;
930 memcpy (info->creation_time, hdr.creation_time, 8);
931 info->creation_time[8] = 0;
933 #ifdef WORDS_BIGENDIAN
934 info->big_endian = !r->reverse_endian;
936 info->big_endian = r->reverse_endian;
939 info->compressed = hdr.compress;
941 info->case_cnt = hdr.case_cnt;
943 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
944 if (c_isgraph ((unsigned char) *cp))
946 strcpy (info->product, cp);
955 /* Reads most of the dictionary from file H; also fills in the
956 associated VAR_BY_IDX array. */
958 read_variables (struct sfm_reader *r,
959 struct dictionary *dict, struct variable ***var_by_idx)
963 struct sysfile_variable sv; /* Disk buffer. */
964 int long_string_count = 0; /* # of long string continuation
965 records still expected. */
966 int next_value = 0; /* Index to next `value' structure. */
972 /* Pre-allocate variables. */
973 if (r->value_cnt != -1)
975 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
976 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
980 /* Read in the entry for each variable and use the info to
981 initialize the dictionary. */
985 char name[SHORT_NAME_LEN + 1];
989 assertive_buf_read (r, &sv, sizeof sv, 0);
991 if (r->reverse_endian)
993 bswap_int32 (&sv.rec_type);
994 bswap_int32 (&sv.type);
995 bswap_int32 (&sv.has_var_label);
996 bswap_int32 (&sv.n_missing_values);
997 bswap_int32 (&sv.print);
998 bswap_int32 (&sv.write);
1001 /* We've come to the end of the variable entries */
1002 if (sv.rec_type != 2)
1004 buf_unread(r, sizeof sv);
1009 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
1010 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
1012 /* If there was a long string previously, make sure that the
1013 continuations are present; otherwise make sure there aren't
1015 if (long_string_count)
1018 lose ((ME, _("%s: position %d: String variable does not have "
1019 "proper number of continuation records."),
1020 fh_get_file_name (r->fh), i));
1023 r->vars[i].width = -1;
1024 (*var_by_idx)[i] = NULL;
1025 long_string_count--;
1028 else if (sv.type == -1)
1029 lose ((ME, _("%s: position %d: Superfluous long string continuation "
1031 fh_get_file_name (r->fh), i));
1033 /* Check fields for validity. */
1034 if (sv.type < 0 || sv.type > 255)
1035 lose ((ME, _("%s: position %d: Bad variable type code %d."),
1036 fh_get_file_name (r->fh), i, sv.type));
1037 if (sv.has_var_label != 0 && sv.has_var_label != 1)
1038 lose ((ME, _("%s: position %d: Variable label indicator field is not "
1039 "0 or 1."), fh_get_file_name (r->fh), i));
1040 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
1041 || sv.n_missing_values == -1)
1042 lose ((ME, _("%s: position %d: Missing value indicator field is not "
1043 "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i));
1045 /* Copy first character of variable name. */
1046 if (sv.name[0] == '@' || sv.name[0] == '#')
1047 lose ((ME, _("%s: position %d: Variable name begins with invalid "
1049 fh_get_file_name (r->fh), i));
1051 name[0] = sv.name[0];
1053 /* Copy remaining characters of variable name. */
1054 for (j = 1; j < SHORT_NAME_LEN; j++)
1056 int c = (unsigned char) sv.name[j];
1065 if ( ! var_is_plausible_name(name, false) )
1066 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
1067 fh_get_file_name (r->fh), name));
1069 /* Create variable. */
1070 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
1072 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
1073 fh_get_file_name (r->fh), name));
1075 /* Set the short name the same as the long name */
1076 var_set_short_name (vv, vv->name);
1078 /* Case reading data. */
1079 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
1080 long_string_count = nv - 1;
1083 /* Get variable label, if any. */
1084 if (sv.has_var_label == 1)
1089 /* Read length of label. */
1090 assertive_buf_read (r, &len, sizeof len, 0);
1091 if (r->reverse_endian)
1095 if (len < 0 || len > 255)
1096 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
1098 fh_get_file_name (r->fh), vv->name, len));
1102 /* Read label into variable structure. */
1103 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1);
1104 if (vv->label == NULL)
1106 vv->label[len] = '\0';
1110 /* Set missing values. */
1111 if (sv.n_missing_values != 0)
1114 int mv_cnt = abs (sv.n_missing_values);
1116 if (vv->width > MAX_SHORT_STRING)
1117 lose ((ME, _("%s: Long string variable %s may not have missing "
1119 fh_get_file_name (r->fh), vv->name));
1121 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
1123 if (r->reverse_endian && vv->type == NUMERIC)
1124 for (j = 0; j < mv_cnt; j++)
1125 bswap_flt64 (&mv[j]);
1127 if (sv.n_missing_values > 0)
1129 for (j = 0; j < sv.n_missing_values; j++)
1130 if (vv->type == NUMERIC)
1131 mv_add_num (&vv->miss, mv[j]);
1133 mv_add_str (&vv->miss, (char *) &mv[j]);
1137 if (vv->type == ALPHA)
1138 lose ((ME, _("%s: String variable %s may not have missing "
1139 "values specified as a range."),
1140 fh_get_file_name (r->fh), vv->name));
1142 if (mv[0] == r->lowest)
1143 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
1144 else if (mv[1] == r->highest)
1145 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
1147 mv_add_num_range (&vv->miss, mv[0], mv[1]);
1149 if (sv.n_missing_values == -3)
1150 mv_add_num (&vv->miss, mv[2]);
1154 if (!parse_format_spec (r, sv.print, &vv->print, vv)
1155 || !parse_format_spec (r, sv.write, &vv->write, vv))
1158 r->vars[i].width = vv->width;
1159 r->vars[i].fv = vv->fv;
1163 /* Some consistency checks. */
1164 if (long_string_count != 0)
1165 lose ((ME, _("%s: Long string continuation records omitted at end of "
1167 fh_get_file_name (r->fh)));
1169 if (next_value != r->value_cnt)
1170 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
1171 "%d were read from file."),
1172 fh_get_file_name (r->fh), r->value_cnt, next_value);
1181 /* Translates the format spec from sysfile format to internal
1184 parse_format_spec (struct sfm_reader *r, int32_t s,
1185 struct fmt_spec *f, const struct variable *v)
1187 f->type = translate_fmt ((s >> 16) & 0xff);
1189 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1190 fh_get_file_name (r->fh), (s >> 16) & 0xff));
1191 f->w = (s >> 8) & 0xff;
1194 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1195 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1196 fh_get_file_name (r->fh),
1197 v->type == ALPHA ? _("String") : _("Numeric"),
1199 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1200 formats[f->type].name));
1202 if (!check_output_specifier (f, false)
1203 || !check_specifier_width (f, v->width, false))
1205 msg (ME, _("%s variable %s has invalid format specifier %s."),
1206 v->type == NUMERIC ? _("Numeric") : _("String"),
1207 v->name, fmt_to_string (f));
1208 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1216 /* Reads value labels from sysfile H and inserts them into the
1217 associated dictionary. */
1219 read_value_labels (struct sfm_reader *r,
1220 struct dictionary *dict, struct variable **var_by_idx)
1224 char raw_value[8]; /* Value as uninterpreted bytes. */
1225 union value value; /* Value. */
1226 char *label; /* Null-terminated label string. */
1229 struct label *labels = NULL;
1230 int32_t n_labels; /* Number of labels. */
1232 struct variable **var = NULL; /* Associated variables. */
1233 int32_t n_vars; /* Number of associated variables. */
1237 /* First step: read the contents of the type 3 record and record its
1238 contents. Note that we can't do much with the data since we
1239 don't know yet whether it is of numeric or string type. */
1241 /* Read number of labels. */
1242 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1243 if (r->reverse_endian)
1244 bswap_int32 (&n_labels);
1246 if ( n_labels >= ((int32_t) ~0) / sizeof *labels)
1248 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1249 fh_get_file_name (r->fh), n_labels);
1253 /* Allocate memory. */
1254 labels = xcalloc (n_labels, sizeof *labels);
1255 for (i = 0; i < n_labels; i++)
1256 labels[i].label = NULL;
1258 /* Read each value/label tuple into labels[]. */
1259 for (i = 0; i < n_labels; i++)
1261 struct label *label = labels + i;
1262 unsigned char label_len;
1266 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1268 /* Read label length. */
1269 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1270 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1272 /* Read label, padding. */
1273 label->label = xmalloc (padded_len + 1);
1274 assertive_buf_read (r, label->label, padded_len - 1, 0);
1275 label->label[label_len] = 0;
1278 /* Second step: Read the type 4 record that has the list of
1279 variables to which the value labels are to be applied. */
1281 /* Read record type of type 4 record. */
1285 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1286 if (r->reverse_endian)
1287 bswap_int32 (&rec_type);
1290 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1291 "follow value label record (type 3) as it should."),
1292 fh_get_file_name (r->fh)));
1295 /* Read number of variables associated with value label from type 4
1297 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1298 if (r->reverse_endian)
1299 bswap_int32 (&n_vars);
1300 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1301 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1302 "is not between 1 and the number of variables (%d)."),
1303 fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict)));
1305 /* Read the list of variables. */
1306 var = xnmalloc (n_vars, sizeof *var);
1307 for (i = 0; i < n_vars; i++)
1312 /* Read variable index, check range. */
1313 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1314 if (r->reverse_endian)
1315 bswap_int32 (&var_idx);
1316 if (var_idx < 1 || var_idx > r->value_cnt)
1317 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1318 "not between 1 and the number of values (%d)."),
1319 fh_get_file_name (r->fh), var_idx, r->value_cnt));
1321 /* Make sure it's a real variable. */
1322 v = var_by_idx[var_idx - 1];
1324 lose ((ME, _("%s: Variable index associated with value label (%d) "
1325 "refers to a continuation of a string variable, not to "
1326 "an actual variable."),
1327 fh_get_file_name (r->fh), var_idx));
1328 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1329 lose ((ME, _("%s: Value labels are not allowed on long string "
1331 fh_get_file_name (r->fh), v->name));
1333 /* Add it to the list of variables. */
1337 /* Type check the variables. */
1338 for (i = 1; i < n_vars; i++)
1339 if (var[i]->type != var[0]->type)
1340 lose ((ME, _("%s: Variables associated with value label are not all of "
1341 "identical type. Variable %s has %s type, but variable "
1343 fh_get_file_name (r->fh),
1344 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1345 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1347 /* Fill in labels[].value, now that we know the desired type. */
1348 for (i = 0; i < n_labels; i++)
1350 struct label *label = labels + i;
1352 if (var[0]->type == ALPHA)
1354 const int copy_len = min (sizeof label->raw_value,
1355 sizeof label->label);
1356 memcpy (label->value.s, label->raw_value, copy_len);
1359 assert (sizeof f == sizeof label->raw_value);
1360 memcpy (&f, label->raw_value, sizeof f);
1361 if (r->reverse_endian)
1367 /* Assign the value_label's to each variable. */
1368 for (i = 0; i < n_vars; i++)
1370 struct variable *v = var[i];
1373 /* Add each label to the variable. */
1374 for (j = 0; j < n_labels; j++)
1376 struct label *label = labels + j;
1377 if (!val_labs_replace (v->val_labs, label->value, label->label))
1380 if (var[0]->type == NUMERIC)
1381 msg (MW, _("%s: File contains duplicate label for value %g for "
1383 fh_get_file_name (r->fh), label->value.f, v->name);
1385 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1386 "for variable %s."),
1387 fh_get_file_name (r->fh), v->width, label->value.s, v->name);
1391 for (i = 0; i < n_labels; i++)
1392 free (labels[i].label);
1400 for (i = 0; i < n_labels; i++)
1401 free (labels[i].label);
1408 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1409 non-NULL, uses that as the buffer; otherwise allocates at least
1410 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1413 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1417 if (buf == NULL && byte_cnt > 0 )
1418 buf = xmalloc (max (byte_cnt, min_alloc));
1420 if ( byte_cnt == 0 )
1424 if (1 != fread (buf, byte_cnt, 1, r->file))
1426 if (ferror (r->file))
1427 msg (ME, _("%s: Reading system file: %s."),
1428 fh_get_file_name (r->fh), strerror (errno));
1430 corrupt_msg (ME, _("%s: Unexpected end of file."),
1431 fh_get_file_name (r->fh));
1439 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1441 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1443 assert(byte_cnt > 0);
1445 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1447 msg (ME, _("%s: Seeking system file: %s."),
1448 fh_get_file_name (r->fh), strerror (errno));
1452 /* Reads a document record, type 6, from system file R, and sets up
1453 the documents and n_documents fields in the associated
1456 read_documents (struct sfm_reader *r, struct dictionary *dict)
1461 if (dict_get_documents (dict) != NULL)
1462 lose ((ME, _("%s: System file contains multiple "
1463 "type 6 (document) records."),
1464 fh_get_file_name (r->fh)));
1466 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1468 lose ((ME, _("%s: Number of document lines (%ld) "
1469 "must be greater than 0."),
1470 fh_get_file_name (r->fh), (long) line_cnt));
1472 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1473 /* FIXME? Run through asciify. */
1474 if (documents == NULL)
1476 documents[80 * line_cnt] = '\0';
1477 dict_set_documents (dict, documents);
1487 /* Reads compressed data into H->BUF and sets other pointers
1488 appropriately. Returns nonzero only if both no errors occur and
1491 buffer_input (struct sfm_reader *r)
1498 r->buf = xnmalloc (128, sizeof *r->buf);
1499 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1500 if (ferror (r->file))
1502 msg (ME, _("%s: Error reading file: %s."),
1503 fh_get_file_name (r->fh), strerror (errno));
1508 r->end = &r->buf[amt];
1512 /* Reads a single case consisting of compressed data from system
1513 file H into the array BUF[] according to reader R, and
1514 returns nonzero only if successful. */
1515 /* Data in system files is compressed in this manner. Data
1516 values are grouped into sets of eight ("octets"). Each value
1517 in an octet has one instruction byte that are output together.
1518 Each instruction byte gives a value for that byte or indicates
1519 that the value can be found following the instructions. */
1521 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1523 const unsigned char *p_end = r->x + sizeof (flt64);
1524 unsigned char *p = r->y;
1526 const flt64 *buf_beg = buf;
1527 const flt64 *buf_end = &buf[r->value_cnt];
1531 for (; p < p_end; p++){
1535 /* Code 0 is ignored. */
1538 /* Code 252 is end of file. */
1541 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1542 "in partial case."),
1543 fh_get_file_name (r->fh)));
1545 /* Code 253 indicates that the value is stored explicitly
1546 following the instruction bytes. */
1547 if (r->ptr == NULL || r->ptr >= r->end)
1548 if (!buffer_input (r))
1549 lose ((ME, _("%s: Unexpected end of file."),
1550 fh_get_file_name (r->fh)));
1551 memcpy (buf++, r->ptr++, sizeof *buf);
1556 /* Code 254 indicates a string that is all blanks. */
1557 memset (buf++, ' ', sizeof *buf);
1562 /* Code 255 indicates the system-missing value. */
1564 if (r->reverse_endian)
1571 /* Codes 1 through 251 inclusive are taken to indicate a
1572 value of (BYTE - BIAS), where BYTE is the byte's value
1573 and BIAS is the compression bias (generally 100.0). */
1574 *buf = *p - r->bias;
1575 if (r->reverse_endian)
1583 /* We have reached the end of this instruction octet. Read
1585 if (r->ptr == NULL || r->ptr >= r->end)
1587 if (!buffer_input (r))
1590 lose ((ME, _("%s: Unexpected end of file."),
1591 fh_get_file_name (r->fh)));
1596 memcpy (r->x, r->ptr++, sizeof *buf);
1603 /* We have filled up an entire record. Update state and return
1614 /* Reads one case from READER's file into C. Returns nonzero
1615 only if successful. */
1617 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1622 if (!r->compressed && sizeof (flt64) == sizeof (double))
1624 /* Fast path: external and internal representations are the
1625 same, except possibly for endianness or SYSMIS. Read
1626 directly into the case's buffer, then fix up any minor
1627 details as needed. */
1628 if (!fread_ok (r, case_data_all_rw (c),
1629 sizeof (union value) * r->value_cnt))
1632 /* Fix up endianness if needed. */
1633 if (r->reverse_endian)
1637 for (i = 0; i < r->value_cnt; i++)
1638 if (r->vars[i].width == 0)
1639 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1642 /* Fix up SYSMIS values if needed.
1643 I don't think this will ever actually kick in, but it
1645 if (r->sysmis != SYSMIS)
1649 for (i = 0; i < r->value_cnt; i++)
1650 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1651 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1656 /* Slow path: internal and external representations differ.
1657 Read into a bounce buffer, then copy to C. */
1664 bounce_size = sizeof *bounce * r->value_cnt;
1665 bounce = bounce_cur = local_alloc (bounce_size);
1668 read_ok = fread_ok (r, bounce, bounce_size);
1670 read_ok = read_compressed_data (r, bounce);
1673 local_free (bounce);
1677 for (i = 0; i < r->value_cnt; i++)
1679 struct sfm_var *v = &r->vars[i];
1683 flt64 f = *bounce_cur++;
1684 if (r->reverse_endian)
1686 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1688 else if (v->width != -1)
1690 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1691 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1695 local_free (bounce);
1701 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1703 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1705 if (read_bytes == byte_cnt)
1709 if (ferror (r->file))
1711 msg (ME, _("%s: Reading system file: %s."),
1712 fh_get_file_name (r->fh), strerror (errno));
1715 else if (read_bytes != 0)
1717 msg (ME, _("%s: Partial record at end of system file."),
1718 fh_get_file_name (r->fh));
1725 /* Returns true if an I/O error has occurred on READER, false
1728 sfm_read_error (const struct sfm_reader *reader)
1733 /* Returns true if FILE is an SPSS system file,
1736 sfm_detect (FILE *file)
1738 struct sysfile_header hdr;
1740 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1742 if (strncmp ("$FL2", hdr.rec_type, 4))