1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "dictionary.h"
33 #include "file-handle.h"
40 #include "value-labels.h"
45 #define _(msgid) gettext (msgid)
47 #include "debug-print.h"
49 /* System file reader. */
52 struct file_handle *fh; /* File handle. */
53 FILE *file; /* File stream. */
55 int reverse_endian; /* 1=file has endianness opposite us. */
56 int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */
57 int value_cnt; /* Number of `union values's per case. */
58 long case_cnt; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_idx; /* 0-based index of weighting variable, or -1. */
64 struct sfm_var *vars; /* Variables. */
66 /* File's special constants. */
71 /* Decompression buffer. */
72 flt64 *buf; /* Buffer data. */
73 flt64 *ptr; /* Current location in buffer. */
74 flt64 *end; /* End of buffer data. */
76 /* Compression instruction octet. */
77 unsigned char x[8]; /* Current instruction octet. */
78 unsigned char *y; /* Location in current instruction octet. */
81 /* A variable in a system file. */
84 int width; /* 0=numeric, otherwise string width. */
85 int fv; /* Index into case. */
90 /* Swap bytes *A and *B. */
92 bswap (char *a, char *b)
99 /* Reverse the byte order of 32-bit integer *X. */
101 bswap_int32 (int32 *x_)
103 char *x = (char *) x_;
104 bswap (x + 0, x + 3);
105 bswap (x + 1, x + 2);
108 /* Reverse the byte order of 64-bit floating point *X. */
110 bswap_flt64 (flt64 *x_)
112 char *x = (char *) x_;
113 bswap (x + 0, x + 7);
114 bswap (x + 1, x + 6);
115 bswap (x + 2, x + 5);
116 bswap (x + 3, x + 4);
120 corrupt_msg (int class, const char *format,...)
121 PRINTF_FORMAT (2, 3);
123 /* Displays a corrupt sysfile error. */
125 corrupt_msg (int class, const char *format,...)
131 getl_location (&e.where.filename, &e.where.line_number);
132 e.title = _("corrupt system file: ");
134 va_start (args, format);
135 err_vmsg (&e, format, args);
139 /* Closes a system file after we're done with it. */
141 sfm_close_reader (struct sfm_reader *r)
148 if (fn_close (fh_get_filename (r->fh), r->file) == EOF)
149 msg (ME, _("%s: Closing system file: %s."),
150 fh_get_filename (r->fh), strerror (errno));
155 fh_close (r->fh, "system file", "rs");
162 /* Dictionary reader. */
164 static void buf_unread(struct sfm_reader *r, size_t byte_cnt);
166 static void *buf_read (struct sfm_reader *, void *buf, size_t byte_cnt,
169 static int read_header (struct sfm_reader *,
170 struct dictionary *, struct sfm_read_info *);
171 static int parse_format_spec (struct sfm_reader *, int32,
172 struct fmt_spec *, struct variable *);
173 static int read_value_labels (struct sfm_reader *, struct dictionary *,
174 struct variable **var_by_idx);
175 static int read_variables (struct sfm_reader *,
176 struct dictionary *, struct variable ***var_by_idx);
177 static int read_machine_int32_info (struct sfm_reader *, int size, int count);
178 static int read_machine_flt64_info (struct sfm_reader *, int size, int count);
179 static int read_documents (struct sfm_reader *, struct dictionary *);
181 static int fread_ok (struct sfm_reader *, void *, size_t);
183 /* Displays the message X with corrupt_msg, then jumps to the error
191 /* Calls buf_read with the specified arguments, and jumps to
192 error if the read fails. */
193 #define assertive_buf_read(a,b,c,d) \
195 if (!buf_read (a,b,c,d)) \
199 /* Opens the system file designated by file handle FH for
200 reading. Reads the system file's dictionary into *DICT.
201 If INFO is non-null, then it receives additional info about the
204 sfm_open_reader (struct file_handle *fh, struct dictionary **dict,
205 struct sfm_read_info *info)
207 struct sfm_reader *r = NULL;
208 struct variable **var_by_idx = NULL;
210 *dict = dict_create ();
211 if (!fh_open (fh, FH_REF_FILE, "system file", "rs"))
214 /* Create and initialize reader. */
215 r = xmalloc (sizeof *r);
217 r->file = fn_open (fh_get_filename (fh), "rb");
219 r->reverse_endian = 0;
229 r->sysmis = -FLT64_MAX;
230 r->highest = FLT64_MAX;
231 r->lowest = second_lowest_flt64;
233 r->buf = r->ptr = r->end = NULL;
234 r->y = r->x + sizeof r->x;
236 /* Check that file open succeeded. */
239 msg (ME, _("An error occurred while opening \"%s\" for reading "
240 "as a system file: %s."),
241 fh_get_filename (r->fh), strerror (errno));
246 /* Read header and variables. */
247 if (!read_header (r, *dict, info) || !read_variables (r, *dict, &var_by_idx))
251 /* Handle weighting. */
252 if (r->weight_idx != -1)
254 struct variable *weight_var;
256 if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt)
257 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
258 "and number of elements per case (%d)."),
259 fh_get_filename (r->fh), r->weight_idx, r->value_cnt));
262 weight_var = var_by_idx[r->weight_idx];
264 if (weight_var == NULL)
266 _("%s: Weighting variable may not be a continuation of "
267 "a long string variable."), fh_get_filename (fh)));
268 else if (weight_var->type == ALPHA)
269 lose ((ME, _("%s: Weighting variable may not be a string variable."),
270 fh_get_filename (fh)));
272 dict_set_weight (*dict, weight_var);
275 dict_set_weight (*dict, NULL);
277 /* Read records of types 3, 4, 6, and 7. */
282 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
283 if (r->reverse_endian)
284 bswap_int32 (&rec_type);
289 if (!read_value_labels (r, *dict, var_by_idx))
294 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
295 "records must always immediately follow type 3 "
297 fh_get_filename (r->fh)));
300 if (!read_documents (r, *dict))
317 assertive_buf_read (r, &data, sizeof data, 0);
318 if (r->reverse_endian)
320 bswap_int32 (&data.subtype);
321 bswap_int32 (&data.size);
322 bswap_int32 (&data.count);
324 bytes = data.size * data.count;
325 if (bytes < data.size || bytes < data.count)
326 lose ((ME, "%s: Record type %d subtype %d too large.",
327 fh_get_filename (r->fh), rec_type, data.subtype));
329 switch (data.subtype)
332 if (!read_machine_int32_info (r, data.size, data.count))
337 if (!read_machine_flt64_info (r, data.size, data.count))
342 case 6: /* ?? Used by SPSS 8.0. */
346 case 11: /* Variable display parameters */
348 const int n_vars = data.count / 3 ;
350 if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) )
352 msg (MW, _("%s: Invalid subrecord length. "
353 "Record: 7; Subrecord: 11"),
354 fh_get_filename (r->fh));
358 for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i )
370 assertive_buf_read (r, ¶ms, sizeof(params), 0);
372 v = dict_get_var(*dict, i);
374 v->measure = params.measure;
375 v->display_width = params.width;
376 v->alignment = params.align;
381 case 13: /* SPSS 12.0 Long variable name map */
383 char *buf, *short_name, *save_ptr;
387 buf = xmalloc (bytes + 1);
388 if (!buf_read (r, buf, bytes, 0))
396 for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0;
398 short_name = strtok_r (NULL, "=", &save_ptr), idx++)
400 char *long_name = strtok_r (NULL, "\t", &save_ptr);
403 /* Validate long name. */
404 if (long_name == NULL)
406 msg (MW, _("%s: Trailing garbage in long variable "
408 fh_get_filename (r->fh));
411 if (!var_is_valid_name (long_name, false))
413 msg (MW, _("%s: Long variable mapping to invalid "
414 "variable name `%s'."),
415 fh_get_filename (r->fh), long_name);
419 /* Find variable using short name. */
420 v = dict_lookup_var (*dict, short_name);
423 msg (MW, _("%s: Long variable mapping for "
424 "nonexistent variable %s."),
425 fh_get_filename (r->fh), short_name);
429 /* Identify any duplicates. */
430 if ( compare_var_names(short_name, long_name, 0) &&
431 NULL != dict_lookup_var (*dict, long_name))
433 lose ((ME, _("%s: Duplicate long variable name `%s' "
434 "within system file."),
435 fh_get_filename (r->fh), long_name));
440 Renaming a variable may clear the short
441 name, but we want to retain it, so
442 re-set it explicitly. */
443 dict_rename_var (*dict, v, long_name);
444 var_set_short_name (v, short_name);
446 /* For compatability, make sure dictionary
447 is in long variable name map order. In
448 the common case, this has no effect,
449 because the dictionary and the long
450 variable name map are already in the
452 dict_reorder_var (*dict, v, idx);
461 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
462 "encountered in system file."),
463 fh_get_filename (r->fh), data.subtype);
469 void *x = buf_read (r, NULL, data.size * data.count, 0);
481 assertive_buf_read (r, &filler, sizeof filler, 0);
486 corrupt_msg(MW, _("%s: Unrecognized record type %d."),
487 fh_get_filename (r->fh), rec_type);
492 /* Come here on successful completion. */
497 /* Come here on unsuccessful completion. */
498 sfm_close_reader (r);
502 dict_destroy (*dict);
508 /* Read record type 7, subtype 3. */
510 read_machine_int32_info (struct sfm_reader *r, int size, int count)
517 if (size != sizeof (int32) || count != 8)
518 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
519 "subtype 3. Expected size %d, count 8."),
520 fh_get_filename (r->fh), size, count, sizeof (int32)));
522 assertive_buf_read (r, data, sizeof data, 0);
523 if (r->reverse_endian)
524 for (i = 0; i < 8; i++)
525 bswap_int32 (&data[i]);
529 lose ((ME, _("%s: Floating-point representation in system file is not "
530 "IEEE-754. PSPP cannot convert between floating-point "
532 fh_get_filename (r->fh)));
534 #error Add support for your floating-point format.
537 #ifdef WORDS_BIGENDIAN
542 if (r->reverse_endian)
544 if (file_bigendian ^ (data[6] == 1))
545 lose ((ME, _("%s: File-indicated endianness (%s) does not match "
546 "endianness intuited from file header (%s)."),
547 fh_get_filename (r->fh),
548 file_bigendian ? _("big-endian") : _("little-endian"),
549 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
552 /* PORTME: Character representation code. */
553 if (data[7] != 2 && data[7] != 3)
554 lose ((ME, _("%s: File-indicated character representation code (%s) is "
556 fh_get_filename (r->fh),
557 (data[7] == 1 ? "EBCDIC"
558 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
566 /* Read record type 7, subtype 4. */
568 read_machine_flt64_info (struct sfm_reader *r, int size, int count)
573 if (size != sizeof (flt64) || count != 3)
574 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
575 "subtype 4. Expected size %d, count 8."),
576 fh_get_filename (r->fh), size, count, sizeof (flt64)));
578 assertive_buf_read (r, data, sizeof data, 0);
579 if (r->reverse_endian)
580 for (i = 0; i < 3; i++)
581 bswap_flt64 (&data[i]);
583 if (data[0] != SYSMIS || data[1] != FLT64_MAX
584 || data[2] != second_lowest_flt64)
587 r->highest = data[1];
589 msg (MW, _("%s: File-indicated value is different from internal value "
590 "for at least one of the three system values. SYSMIS: "
591 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
593 fh_get_filename (r->fh), (double) data[0], (double) SYSMIS,
594 (double) data[1], (double) FLT64_MAX,
595 (double) data[2], (double) second_lowest_flt64);
605 read_header (struct sfm_reader *r,
606 struct dictionary *dict, struct sfm_read_info *info)
608 struct sysfile_header hdr; /* Disk buffer. */
609 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
610 int skip_amt = 0; /* Amount of product name to omit. */
613 /* Read header, check magic. */
614 assertive_buf_read (r, &hdr, sizeof hdr, 0);
615 if (strncmp ("$FL2", hdr.rec_type, 4) != 0)
616 lose ((ME, _("%s: Bad magic. Proper system files begin with "
617 "the four characters `$FL2'. This file will not be read."),
618 fh_get_filename (r->fh)));
620 /* Check eye-catcher string. */
621 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
622 for (i = 0; i < 60; i++)
623 if (!isprint ((unsigned char) prod_name[i]))
625 for (i = 59; i >= 0; i--)
626 if (!isgraph ((unsigned char) prod_name[i]))
631 prod_name[60] = '\0';
635 static const char *prefix[N_PREFIXES] =
637 "@(#) SPSS DATA FILE",
643 for (i = 0; i < N_PREFIXES; i++)
644 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
646 skip_amt = strlen (prefix[i]);
651 /* Check endianness. */
652 if (hdr.layout_code == 2)
653 r->reverse_endian = 0;
656 bswap_int32 (&hdr.layout_code);
657 if (hdr.layout_code != 2)
658 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
659 "should be 2, in big-endian or little-endian format."),
660 fh_get_filename (r->fh), hdr.layout_code));
662 r->reverse_endian = 1;
663 bswap_int32 (&hdr.case_size);
664 bswap_int32 (&hdr.compress);
665 bswap_int32 (&hdr.weight_idx);
666 bswap_int32 (&hdr.case_cnt);
667 bswap_flt64 (&hdr.bias);
671 /* Copy basic info and verify correctness. */
672 r->value_cnt = hdr.case_size;
674 /* If value count is rediculous, then force it to -1 (a sentinel value) */
675 if ( r->value_cnt < 0 ||
676 r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2))
679 r->compressed = hdr.compress;
681 r->weight_idx = hdr.weight_idx - 1;
683 r->case_cnt = hdr.case_cnt;
684 if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2)
686 _("%s: Number of cases in file (%ld) is not between -1 and %d."),
687 fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2));
690 if (r->bias != 100.0)
691 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
693 fh_get_filename (r->fh), r->bias);
695 /* Make a file label only on the condition that the given label is
696 not all spaces or nulls. */
700 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
701 if (!isspace ((unsigned char) hdr.file_label[i])
702 && hdr.file_label[i] != 0)
704 char *label = xmalloc (i + 2);
705 memcpy (label, hdr.file_label, i + 1);
707 dict_set_label (dict, label);
717 memcpy (info->creation_date, hdr.creation_date, 9);
718 info->creation_date[9] = 0;
720 memcpy (info->creation_time, hdr.creation_time, 8);
721 info->creation_time[8] = 0;
723 #ifdef WORDS_BIGENDIAN
724 info->big_endian = !r->reverse_endian;
726 info->big_endian = r->reverse_endian;
729 info->compressed = hdr.compress;
731 info->case_cnt = hdr.case_cnt;
733 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
734 if (isgraph ((unsigned char) *cp))
736 strcpy (info->product, cp);
745 /* Reads most of the dictionary from file H; also fills in the
746 associated VAR_BY_IDX array. */
748 read_variables (struct sfm_reader *r,
749 struct dictionary *dict, struct variable ***var_by_idx)
753 struct sysfile_variable sv; /* Disk buffer. */
754 int long_string_count = 0; /* # of long string continuation
755 records still expected. */
756 int next_value = 0; /* Index to next `value' structure. */
762 /* Pre-allocate variables. */
763 if (r->value_cnt != -1)
765 *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx);
766 r->vars = xnmalloc (r->value_cnt, sizeof *r->vars);
770 /* Read in the entry for each variable and use the info to
771 initialize the dictionary. */
775 char name[SHORT_NAME_LEN + 1];
779 if ( r->value_cnt != -1 && i >= r->value_cnt )
782 assertive_buf_read (r, &sv, sizeof sv, 0);
784 if (r->reverse_endian)
786 bswap_int32 (&sv.rec_type);
787 bswap_int32 (&sv.type);
788 bswap_int32 (&sv.has_var_label);
789 bswap_int32 (&sv.n_missing_values);
790 bswap_int32 (&sv.print);
791 bswap_int32 (&sv.write);
794 /* We've come to the end of the variable entries */
795 if (sv.rec_type != 2)
797 buf_unread(r, sizeof sv);
802 if ( -1 == r->value_cnt )
804 *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx);
805 r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars);
808 /* If there was a long string previously, make sure that the
809 continuations are present; otherwise make sure there aren't
811 if (long_string_count)
814 lose ((ME, _("%s: position %d: String variable does not have "
815 "proper number of continuation records."),
816 fh_get_filename (r->fh), i));
819 r->vars[i].width = -1;
820 (*var_by_idx)[i] = NULL;
824 else if (sv.type == -1)
825 lose ((ME, _("%s: position %d: Superfluous long string continuation "
827 fh_get_filename (r->fh), i));
829 /* Check fields for validity. */
830 if (sv.type < 0 || sv.type > 255)
831 lose ((ME, _("%s: position %d: Bad variable type code %d."),
832 fh_get_filename (r->fh), i, sv.type));
833 if (sv.has_var_label != 0 && sv.has_var_label != 1)
834 lose ((ME, _("%s: position %d: Variable label indicator field is not "
835 "0 or 1."), fh_get_filename (r->fh), i));
836 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
837 || sv.n_missing_values == -1)
838 lose ((ME, _("%s: position %d: Missing value indicator field is not "
839 "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i));
841 /* Copy first character of variable name. */
842 if (!isalpha ((unsigned char) sv.name[0])
843 && sv.name[0] != '@' && sv.name[0] != '#')
844 lose ((ME, _("%s: position %d: Variable name begins with invalid "
846 fh_get_filename (r->fh), i));
847 if (islower ((unsigned char) sv.name[0]))
848 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
850 fh_get_filename (r->fh), i, sv.name[0]);
851 if (sv.name[0] == '#')
852 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
853 "(`#'). Scratch variables should not appear in system "
855 fh_get_filename (r->fh), i);
856 name[0] = toupper ((unsigned char) (sv.name[0]));
858 /* Copy remaining characters of variable name. */
859 for (j = 1; j < SHORT_NAME_LEN; j++)
861 int c = (unsigned char) sv.name[j];
865 else if (islower (c))
867 msg (MW, _("%s: position %d: Variable name character %d is "
868 "lowercase letter %c."),
869 fh_get_filename (r->fh), i, j + 1, sv.name[j]);
870 name[j] = toupper ((unsigned char) (c));
872 else if (isalnum (c) || c == '.' || c == '@'
873 || c == '#' || c == '$' || c == '_')
876 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
878 fh_get_filename (r->fh), i, c, c));
882 if ( ! var_is_valid_name(name, false) )
883 lose ((ME, _("%s: Invalid variable name `%s' within system file."),
884 fh_get_filename (r->fh), name));
886 /* Create variable. */
888 vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type);
890 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
891 fh_get_filename (r->fh), name));
893 var_set_short_name (vv, vv->name);
895 /* Case reading data. */
896 nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64));
897 long_string_count = nv - 1;
900 /* Get variable label, if any. */
901 if (sv.has_var_label == 1)
906 /* Read length of label. */
907 assertive_buf_read (r, &len, sizeof len, 0);
908 if (r->reverse_endian)
912 if (len < 0 || len > 255)
913 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
915 fh_get_filename (r->fh), vv->name, len));
919 /* Read label into variable structure. */
920 vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
921 if (vv->label == NULL)
923 vv->label[len] = '\0';
927 /* Set missing values. */
928 if (sv.n_missing_values != 0)
931 int mv_cnt = abs (sv.n_missing_values);
933 if (vv->width > MAX_SHORT_STRING)
934 lose ((ME, _("%s: Long string variable %s may not have missing "
936 fh_get_filename (r->fh), vv->name));
938 assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0);
940 if (r->reverse_endian && vv->type == NUMERIC)
941 for (j = 0; j < mv_cnt; j++)
942 bswap_flt64 (&mv[j]);
944 if (sv.n_missing_values > 0)
946 for (j = 0; j < sv.n_missing_values; j++)
947 if (vv->type == NUMERIC)
948 mv_add_num (&vv->miss, mv[j]);
950 mv_add_str (&vv->miss, (char *) &mv[j]);
954 if (vv->type == ALPHA)
955 lose ((ME, _("%s: String variable %s may not have missing "
956 "values specified as a range."),
957 fh_get_filename (r->fh), vv->name));
959 if (mv[0] == r->lowest)
960 mv_add_num_range (&vv->miss, LOWEST, mv[1]);
961 else if (mv[1] == r->highest)
962 mv_add_num_range (&vv->miss, mv[0], HIGHEST);
964 mv_add_num_range (&vv->miss, mv[0], mv[1]);
966 if (sv.n_missing_values == -3)
967 mv_add_num (&vv->miss, mv[2]);
971 if (!parse_format_spec (r, sv.print, &vv->print, vv)
972 || !parse_format_spec (r, sv.write, &vv->write, vv))
975 r->vars[i].width = vv->width;
976 r->vars[i].fv = vv->fv;
980 /* Some consistency checks. */
981 if (long_string_count != 0)
982 lose ((ME, _("%s: Long string continuation records omitted at end of "
984 fh_get_filename (r->fh)));
986 if (next_value != r->value_cnt)
987 corrupt_msg(MW, _("%s: System file header indicates %d variable positions but "
988 "%d were read from file."),
989 fh_get_filename (r->fh), r->value_cnt, next_value);
998 /* Translates the format spec from sysfile format to internal
1001 parse_format_spec (struct sfm_reader *r, int32 s,
1002 struct fmt_spec *f, struct variable *v)
1004 f->type = translate_fmt ((s >> 16) & 0xff);
1006 lose ((ME, _("%s: Bad format specifier byte (%d)."),
1007 fh_get_filename (r->fh), (s >> 16) & 0xff));
1008 f->w = (s >> 8) & 0xff;
1011 if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0))
1012 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1013 fh_get_filename (r->fh),
1014 v->type == ALPHA ? _("String") : _("Numeric"),
1016 formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1017 formats[f->type].name));
1019 if (!check_output_specifier (f, false)
1020 || !check_specifier_width (f, v->width, false))
1022 msg (ME, _("%s variable %s has invalid format specifier %s."),
1023 v->type == NUMERIC ? _("Numeric") : _("String"),
1024 v->name, fmt_to_string (f));
1025 *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0);
1033 /* Reads value labels from sysfile H and inserts them into the
1034 associated dictionary. */
1036 read_value_labels (struct sfm_reader *r,
1037 struct dictionary *dict, struct variable **var_by_idx)
1041 char raw_value[8]; /* Value as uninterpreted bytes. */
1042 union value value; /* Value. */
1043 char *label; /* Null-terminated label string. */
1046 struct label *labels = NULL;
1047 int32 n_labels; /* Number of labels. */
1049 struct variable **var = NULL; /* Associated variables. */
1050 int32 n_vars; /* Number of associated variables. */
1054 /* First step: read the contents of the type 3 record and record its
1055 contents. Note that we can't do much with the data since we
1056 don't know yet whether it is of numeric or string type. */
1058 /* Read number of labels. */
1059 assertive_buf_read (r, &n_labels, sizeof n_labels, 0);
1060 if (r->reverse_endian)
1061 bswap_int32 (&n_labels);
1063 if ( n_labels >= ((int32) ~0) / sizeof *labels)
1065 corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."),
1066 fh_get_filename (r->fh), n_labels);
1070 /* Allocate memory. */
1071 labels = xcalloc (n_labels, sizeof *labels);
1072 for (i = 0; i < n_labels; i++)
1073 labels[i].label = NULL;
1075 /* Read each value/label tuple into labels[]. */
1076 for (i = 0; i < n_labels; i++)
1078 struct label *label = labels + i;
1079 unsigned char label_len;
1083 assertive_buf_read (r, label->raw_value, sizeof label->raw_value, 0);
1085 /* Read label length. */
1086 assertive_buf_read (r, &label_len, sizeof label_len, 0);
1087 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
1089 /* Read label, padding. */
1090 label->label = xmalloc (padded_len + 1);
1091 assertive_buf_read (r, label->label, padded_len - 1, 0);
1092 label->label[label_len] = 0;
1095 /* Second step: Read the type 4 record that has the list of
1096 variables to which the value labels are to be applied. */
1098 /* Read record type of type 4 record. */
1102 assertive_buf_read (r, &rec_type, sizeof rec_type, 0);
1103 if (r->reverse_endian)
1104 bswap_int32 (&rec_type);
1107 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1108 "follow value label record (type 3) as it should."),
1109 fh_get_filename (r->fh)));
1112 /* Read number of variables associated with value label from type 4
1114 assertive_buf_read (r, &n_vars, sizeof n_vars, 0);
1115 if (r->reverse_endian)
1116 bswap_int32 (&n_vars);
1117 if (n_vars < 1 || n_vars > dict_get_var_cnt (dict))
1118 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1119 "is not between 1 and the number of variables (%d)."),
1120 fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict)));
1122 /* Read the list of variables. */
1123 var = xnmalloc (n_vars, sizeof *var);
1124 for (i = 0; i < n_vars; i++)
1129 /* Read variable index, check range. */
1130 assertive_buf_read (r, &var_idx, sizeof var_idx, 0);
1131 if (r->reverse_endian)
1132 bswap_int32 (&var_idx);
1133 if (var_idx < 1 || var_idx > r->value_cnt)
1134 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1135 "not between 1 and the number of values (%d)."),
1136 fh_get_filename (r->fh), var_idx, r->value_cnt));
1138 /* Make sure it's a real variable. */
1139 v = var_by_idx[var_idx - 1];
1141 lose ((ME, _("%s: Variable index associated with value label (%d) "
1142 "refers to a continuation of a string variable, not to "
1143 "an actual variable."),
1144 fh_get_filename (r->fh), var_idx));
1145 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1146 lose ((ME, _("%s: Value labels are not allowed on long string "
1148 fh_get_filename (r->fh), v->name));
1150 /* Add it to the list of variables. */
1154 /* Type check the variables. */
1155 for (i = 1; i < n_vars; i++)
1156 if (var[i]->type != var[0]->type)
1157 lose ((ME, _("%s: Variables associated with value label are not all of "
1158 "identical type. Variable %s has %s type, but variable "
1160 fh_get_filename (r->fh),
1161 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1162 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1164 /* Fill in labels[].value, now that we know the desired type. */
1165 for (i = 0; i < n_labels; i++)
1167 struct label *label = labels + i;
1169 if (var[0]->type == ALPHA)
1171 const int copy_len = min (sizeof label->raw_value,
1172 sizeof label->label);
1173 memcpy (label->value.s, label->raw_value, copy_len);
1176 assert (sizeof f == sizeof label->raw_value);
1177 memcpy (&f, label->raw_value, sizeof f);
1178 if (r->reverse_endian)
1184 /* Assign the value_label's to each variable. */
1185 for (i = 0; i < n_vars; i++)
1187 struct variable *v = var[i];
1190 /* Add each label to the variable. */
1191 for (j = 0; j < n_labels; j++)
1193 struct label *label = labels + j;
1194 if (!val_labs_replace (v->val_labs, label->value, label->label))
1197 if (var[0]->type == NUMERIC)
1198 msg (MW, _("%s: File contains duplicate label for value %g for "
1200 fh_get_filename (r->fh), label->value.f, v->name);
1202 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1203 "for variable %s."),
1204 fh_get_filename (r->fh), v->width, label->value.s, v->name);
1208 for (i = 0; i < n_labels; i++)
1209 free (labels[i].label);
1217 for (i = 0; i < n_labels; i++)
1218 free (labels[i].label);
1225 /* Reads BYTE_CNT bytes from the file represented by H. If BUF is
1226 non-NULL, uses that as the buffer; otherwise allocates at least
1227 MIN_ALLOC bytes. Returns a pointer to the buffer on success, NULL
1230 buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc)
1234 if (buf == NULL && byte_cnt > 0 )
1235 buf = xmalloc (max (byte_cnt, min_alloc));
1237 if ( byte_cnt == 0 )
1241 if (1 != fread (buf, byte_cnt, 1, r->file))
1243 if (ferror (r->file))
1244 msg (ME, _("%s: Reading system file: %s."),
1245 fh_get_filename (r->fh), strerror (errno));
1247 corrupt_msg (ME, _("%s: Unexpected end of file."),
1248 fh_get_filename (r->fh));
1254 /* Winds the reader BYTE_CNT bytes back in the reader stream. */
1256 buf_unread(struct sfm_reader *r, size_t byte_cnt)
1258 assert(byte_cnt > 0);
1260 if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR))
1262 msg (ME, _("%s: Seeking system file: %s."),
1263 fh_get_filename (r->fh), strerror (errno));
1267 /* Reads a document record, type 6, from system file R, and sets up
1268 the documents and n_documents fields in the associated
1271 read_documents (struct sfm_reader *r, struct dictionary *dict)
1276 if (dict_get_documents (dict) != NULL)
1277 lose ((ME, _("%s: System file contains multiple "
1278 "type 6 (document) records."),
1279 fh_get_filename (r->fh)));
1281 assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0);
1283 lose ((ME, _("%s: Number of document lines (%ld) "
1284 "must be greater than 0."),
1285 fh_get_filename (r->fh), (long) line_cnt));
1287 documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1);
1288 /* FIXME? Run through asciify. */
1289 if (documents == NULL)
1291 documents[80 * line_cnt] = '\0';
1292 dict_set_documents (dict, documents);
1302 /* Reads compressed data into H->BUF and sets other pointers
1303 appropriately. Returns nonzero only if both no errors occur and
1306 buffer_input (struct sfm_reader *r)
1311 r->buf = xnmalloc (128, sizeof *r->buf);
1312 amt = fread (r->buf, sizeof *r->buf, 128, r->file);
1313 if (ferror (r->file))
1315 msg (ME, _("%s: Error reading file: %s."),
1316 fh_get_filename (r->fh), strerror (errno));
1320 r->end = &r->buf[amt];
1324 /* Reads a single case consisting of compressed data from system
1325 file H into the array BUF[] according to reader R, and
1326 returns nonzero only if successful. */
1327 /* Data in system files is compressed in this manner. Data
1328 values are grouped into sets of eight ("octets"). Each value
1329 in an octet has one instruction byte that are output together.
1330 Each instruction byte gives a value for that byte or indicates
1331 that the value can be found following the instructions. */
1333 read_compressed_data (struct sfm_reader *r, flt64 *buf)
1335 const unsigned char *p_end = r->x + sizeof (flt64);
1336 unsigned char *p = r->y;
1338 const flt64 *buf_beg = buf;
1339 const flt64 *buf_end = &buf[r->value_cnt];
1343 for (; p < p_end; p++){
1347 /* Code 0 is ignored. */
1350 /* Code 252 is end of file. */
1352 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1353 "in partial case."),
1354 fh_get_filename (r->fh)));
1357 /* Code 253 indicates that the value is stored explicitly
1358 following the instruction bytes. */
1359 if (r->ptr == NULL || r->ptr >= r->end)
1360 if (!buffer_input (r))
1362 lose ((ME, _("%s: Unexpected end of file."),
1363 fh_get_filename (r->fh)));
1366 memcpy (buf++, r->ptr++, sizeof *buf);
1371 /* Code 254 indicates a string that is all blanks. */
1372 memset (buf++, ' ', sizeof *buf);
1377 /* Code 255 indicates the system-missing value. */
1379 if (r->reverse_endian)
1386 /* Codes 1 through 251 inclusive are taken to indicate a
1387 value of (BYTE - BIAS), where BYTE is the byte's value
1388 and BIAS is the compression bias (generally 100.0). */
1389 *buf = *p - r->bias;
1390 if (r->reverse_endian)
1398 /* We have reached the end of this instruction octet. Read
1400 if (r->ptr == NULL || r->ptr >= r->end)
1401 if (!buffer_input (r))
1404 lose ((ME, _("%s: Unexpected end of file."),
1405 fh_get_filename (r->fh)));
1408 memcpy (r->x, r->ptr++, sizeof *buf);
1416 /* We have filled up an entire record. Update state and return
1422 /* We have been unsuccessful at filling a record, either through i/o
1423 error or through an end-of-file indication. Update state and
1424 return unsuccessfully. */
1428 /* Reads one case from READER's file into C. Returns nonzero
1429 only if successful. */
1431 sfm_read_case (struct sfm_reader *r, struct ccase *c)
1433 if (!r->compressed && sizeof (flt64) == sizeof (double))
1435 /* Fast path: external and internal representations are the
1436 same, except possibly for endianness or SYSMIS. Read
1437 directly into the case's buffer, then fix up any minor
1438 details as needed. */
1439 if (!fread_ok (r, case_data_all_rw (c),
1440 sizeof (union value) * r->value_cnt))
1443 /* Fix up endianness if needed. */
1444 if (r->reverse_endian)
1448 for (i = 0; i < r->value_cnt; i++)
1449 if (r->vars[i].width == 0)
1450 bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f);
1453 /* Fix up SYSMIS values if needed.
1454 I don't think this will ever actually kick in, but it
1456 if (r->sysmis != SYSMIS)
1460 for (i = 0; i < r->value_cnt; i++)
1461 if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis)
1462 case_data_rw (c, r->vars[i].fv)->f = SYSMIS;
1467 /* Slow path: internal and external representations differ.
1468 Read into a bounce buffer, then copy to C. */
1475 bounce_size = sizeof *bounce * r->value_cnt;
1476 bounce = bounce_cur = local_alloc (bounce_size);
1479 read_ok = fread_ok (r, bounce, bounce_size);
1481 read_ok = read_compressed_data (r, bounce);
1484 local_free (bounce);
1488 for (i = 0; i < r->value_cnt; i++)
1490 struct sfm_var *v = &r->vars[i];
1494 flt64 f = *bounce_cur++;
1495 if (r->reverse_endian)
1497 case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f;
1499 else if (v->width != -1)
1501 memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width);
1502 bounce_cur += DIV_RND_UP (v->width, sizeof (flt64));
1506 local_free (bounce);
1512 fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt)
1514 size_t read_bytes = fread (buffer, 1, byte_cnt, r->file);
1516 if (read_bytes == byte_cnt)
1520 if (ferror (r->file))
1521 msg (ME, _("%s: Reading system file: %s."),
1522 fh_get_filename (r->fh), strerror (errno));
1523 else if (read_bytes != 0)
1524 msg (ME, _("%s: Partial record at end of system file."),
1525 fh_get_filename (r->fh));
1530 /* Returns true if FILE is an SPSS system file,
1533 sfm_detect (FILE *file)
1535 struct sysfile_header hdr;
1537 if (fread (&hdr, sizeof hdr, 1, file) != 1)
1539 if (strncmp ("$FL2", hdr.rec_type, 4))