1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
30 #include "file-handle.h"
37 #include "value-labels.h"
41 #include "debug-print.h"
43 /* PORTME: This file may require substantial revision for those
44 systems that don't meet the typical 32-bit integer/64-bit double
45 model. It's kinda hard to tell without having one of them on my
48 /* sfm's file_handle extension. */
51 FILE *file; /* Actual file. */
52 int opened; /* Reference count. */
54 struct dictionary *dict; /* File's dictionary. */
56 int reverse_endian; /* 1=file has endianness opposite us. */
57 int case_size; /* Number of `values's per case. */
58 long ncases; /* Number of cases, -1 if unknown. */
59 int compressed; /* 1=compressed, 0=not compressed. */
60 double bias; /* Compression bias, usually 100.0. */
61 int weight_index; /* 0-based index of weighting variable, or -1. */
63 /* File's special constants. */
68 /* Uncompression buffer. */
69 flt64 *buf; /* Buffer data. */
70 flt64 *ptr; /* Current location in buffer. */
71 flt64 *end; /* End of buffer data. */
73 /* Compression instruction octet. */
74 unsigned char x[sizeof (flt64)];
75 /* Current instruction octet. */
76 unsigned char *y; /* Location in current instruction octet. */
79 static struct fh_ext_class sfm_r_class;
82 void dump_dictionary (struct dictionary * dict);
87 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
89 bswap_int32 (int32 *x)
91 unsigned char *y = (unsigned char *) x;
103 /* Reverse the byte order of 64-bit floating point *X. */
105 bswap_flt64 (flt64 *x)
107 unsigned char *y = (unsigned char *) x;
128 corrupt_msg (int class, const char *format,...)
129 PRINTF_FORMAT (2, 3);
131 /* Displays a corrupt sysfile error. */
133 corrupt_msg (int class, const char *format,...)
140 va_start (args, format);
141 vsnprintf (buf, 1024, format, args);
149 getl_location (&e.where.filename, &e.where.line_number);
150 e.title = _("corrupt system file: ");
157 /* Closes a system file after we're done with it. */
159 sfm_close (struct file_handle *h)
161 struct sfm_fhuser_ext *ext = h->ext;
164 assert (ext->opened == 0);
165 if (EOF == fn_close (handle_get_filename (h), ext->file))
166 msg (ME, _("%s: Closing system file: %s."),
167 handle_get_filename (h), strerror (errno));
172 /* Closes a system file if we're done with it. */
174 sfm_maybe_close (struct file_handle *h)
176 struct sfm_fhuser_ext *ext = h->ext;
178 if (ext->opened == 1)
184 /* Dictionary reader. */
186 static void *bufread (struct file_handle * handle, void *buf, size_t nbytes,
189 static int read_header (struct file_handle * h, struct sfm_read_info * inf);
190 static int parse_format_spec (struct file_handle * h, int32 s,
191 struct fmt_spec * v, struct variable *vv);
192 static int read_value_labels (struct file_handle * h, struct variable ** var_by_index);
193 static int read_variables (struct file_handle * h, struct variable *** var_by_index);
194 static int read_machine_int32_info (struct file_handle * h, int size, int count);
195 static int read_machine_flt64_info (struct file_handle * h, int size, int count);
196 static int read_documents (struct file_handle * h);
198 /* Displays the message X with corrupt_msg, then jumps to the lossage
208 /* Calls bufread with the specified arguments, and jumps to lossage if
210 #define assertive_bufread(a,b,c,d) \
213 if (!bufread (a,b,c,d)) \
218 /* Reads the dictionary from file with handle H, and returns it in a
219 dictionary structure. This dictionary may be modified in order to
220 rename, reorder, and delete variables, etc. */
222 sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf)
224 /* The file handle extension record. */
225 struct sfm_fhuser_ext *ext;
227 /* Allows for quick reference to variables according to indexes
228 relative to position within a case. */
229 struct variable **var_by_index = NULL;
231 /* Check whether the file is already open. */
232 if (h->class == &sfm_r_class)
238 else if (h->class != NULL)
240 msg (ME, _("Cannot read file %s as system file: already opened for %s."),
241 handle_get_name (h), h->class->name);
245 msg (VM (1), _("%s: Opening system-file handle %s for reading."),
246 handle_get_filename (h), handle_get_name (h));
248 /* Open the physical disk file. */
249 ext = xmalloc (sizeof (struct sfm_fhuser_ext));
250 ext->file = fn_open (handle_get_filename (h), "rb");
251 if (ext->file == NULL)
253 msg (ME, _("An error occurred while opening \"%s\" for reading "
254 "as a system file: %s."),
255 handle_get_filename (h), strerror (errno));
261 /* Initialize the sfm_fhuser_ext structure. */
262 h->class = &sfm_r_class;
265 ext->buf = ext->ptr = ext->end = NULL;
266 ext->y = ext->x + sizeof ext->x;
269 /* Default special constants. */
270 ext->sysmis = -FLT64_MAX;
271 ext->highest = FLT64_MAX;
272 ext->lowest = second_lowest_flt64;
274 /* Read the header. */
275 if (!read_header (h, inf))
278 /* Read about the variables. */
279 if (!read_variables (h, &var_by_index))
282 /* Handle weighting. */
283 if (ext->weight_index != -1)
285 struct variable *wv = var_by_index[ext->weight_index];
288 lose ((ME, _("%s: Weighting variable may not be a continuation of "
289 "a long string variable."), handle_get_filename (h)));
290 else if (wv->type == ALPHA)
291 lose ((ME, _("%s: Weighting variable may not be a string variable."),
292 handle_get_filename (h)));
294 dict_set_weight (ext->dict, wv);
297 dict_set_weight (ext->dict, NULL);
299 /* Read records of types 3, 4, 6, and 7. */
304 assertive_bufread (h, &rec_type, sizeof rec_type, 0);
305 if (ext->reverse_endian)
306 bswap_int32 (&rec_type);
311 if (!read_value_labels (h, var_by_index))
316 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
317 "records must always immediately follow type 3 "
319 handle_get_filename (h)));
322 if (!read_documents (h))
338 assertive_bufread (h, &data, sizeof data, 0);
339 if (ext->reverse_endian)
341 bswap_int32 (&data.subtype);
342 bswap_int32 (&data.size);
343 bswap_int32 (&data.count);
346 switch (data.subtype)
349 if (!read_machine_int32_info (h, data.size, data.count))
354 if (!read_machine_flt64_info (h, data.size, data.count))
360 case 11: /* ?? Used by SPSS 8.0. */
365 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
366 "encountered in system file."),
367 handle_get_filename (h), data.subtype);
373 void *x = bufread (h, NULL, data.size * data.count, 0);
385 assertive_bufread (h, &filler, sizeof filler, 0);
386 goto break_out_of_loop;
390 lose ((ME, _("%s: Unrecognized record type %d."),
391 handle_get_filename (h), rec_type));
396 /* Come here on successful completion. */
397 msg (VM (2), _("Read system-file dictionary successfully."));
400 dump_dictionary (ext->dict);
406 /* Come here on unsuccessful completion. */
407 msg (VM (1), _("Error reading system-file header."));
410 fn_close (handle_get_filename (h), ext->file);
411 if (ext && ext->dict)
412 dict_destroy (ext->dict);
419 /* Read record type 7, subtype 3. */
421 read_machine_int32_info (struct file_handle * h, int size, int count)
423 struct sfm_fhuser_ext *ext = h->ext;
430 if (size != sizeof (int32) || count != 8)
431 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
432 "subtype 3. Expected size %d, count 8."),
433 handle_get_filename (h), size, count, sizeof (int32)));
435 assertive_bufread (h, data, sizeof data, 0);
436 if (ext->reverse_endian)
437 for (i = 0; i < 8; i++)
438 bswap_int32 (&data[i]);
440 /* PORTME: Check floating-point representation. */
443 lose ((ME, _("%s: Floating-point representation in system file is not "
444 "IEEE-754. PSPP cannot convert between floating-point "
446 handle_get_filename (h)));
449 /* PORTME: Check recorded file endianness against intuited file
451 #ifdef WORDS_BIGENDIAN
456 if (ext->reverse_endian)
458 if (file_bigendian ^ (data[6] == 1))
459 lose ((ME, _("%s: File-indicated endianness (%s) does not match endianness "
460 "intuited from file header (%s)."),
461 handle_get_filename (h),
462 file_bigendian ? _("big-endian") : _("little-endian"),
463 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
466 /* PORTME: Character representation code. */
467 if (data[7] != 2 && data[7] != 3)
468 lose ((ME, _("%s: File-indicated character representation code (%s) is not "
470 handle_get_filename (h),
471 (data[7] == 1 ? "EBCDIC"
472 : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))));
480 /* Read record type 7, subtype 4. */
482 read_machine_flt64_info (struct file_handle * h, int size, int count)
484 struct sfm_fhuser_ext *ext = h->ext;
490 if (size != sizeof (flt64) || count != 3)
491 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
492 "subtype 4. Expected size %d, count 8."),
493 handle_get_filename (h), size, count, sizeof (flt64)));
495 assertive_bufread (h, data, sizeof data, 0);
496 if (ext->reverse_endian)
497 for (i = 0; i < 3; i++)
498 bswap_flt64 (&data[i]);
500 if (data[0] != SYSMIS || data[1] != FLT64_MAX
501 || data[2] != second_lowest_flt64)
503 ext->sysmis = data[0];
504 ext->highest = data[1];
505 ext->lowest = data[2];
506 msg (MW, _("%s: File-indicated value is different from internal value "
507 "for at least one of the three system values. SYSMIS: "
508 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
510 handle_get_filename (h), (double) data[0], (double) SYSMIS,
511 (double) data[1], (double) FLT64_MAX,
512 (double) data[2], (double) second_lowest_flt64);
522 read_header (struct file_handle * h, struct sfm_read_info * inf)
524 struct sfm_fhuser_ext *ext = h->ext; /* File extension strcut. */
525 struct sysfile_header hdr; /* Disk buffer. */
526 struct dictionary *dict; /* File dictionary. */
527 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
528 int skip_amt = 0; /* Amount of product name to omit. */
531 /* Create the dictionary. */
532 dict = ext->dict = dict_create ();
534 /* Read header, check magic. */
535 assertive_bufread (h, &hdr, sizeof hdr, 0);
536 if (0 != strncmp ("$FL2", hdr.rec_type, 4))
537 lose ((ME, _("%s: Bad magic. Proper system files begin with "
538 "the four characters `$FL2'. This file will not be read."),
539 handle_get_filename (h)));
541 /* Check eye-catcher string. */
542 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
543 for (i = 0; i < 60; i++)
544 if (!isprint ((unsigned char) prod_name[i]))
546 for (i = 59; i >= 0; i--)
547 if (!isgraph ((unsigned char) prod_name[i]))
552 prod_name[60] = '\0';
556 static const char *prefix[N_PREFIXES] =
558 "@(#) SPSS DATA FILE",
564 for (i = 0; i < N_PREFIXES; i++)
565 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
567 skip_amt = strlen (prefix[i]);
572 /* Check endianness. */
573 /* PORTME: endianness. */
574 if (hdr.layout_code == 2)
575 ext->reverse_endian = 0;
578 bswap_int32 (&hdr.layout_code);
579 if (hdr.layout_code != 2)
580 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
581 "should be 2, in big-endian or little-endian format."),
582 handle_get_filename (h), hdr.layout_code));
584 ext->reverse_endian = 1;
585 bswap_int32 (&hdr.case_size);
586 bswap_int32 (&hdr.compressed);
587 bswap_int32 (&hdr.weight_index);
588 bswap_int32 (&hdr.ncases);
589 bswap_flt64 (&hdr.bias);
592 /* Copy basic info and verify correctness. */
593 ext->case_size = hdr.case_size;
594 if (hdr.case_size <= 0 || ext->case_size > (INT_MAX
595 / (int) sizeof (union value) / 2))
596 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
598 handle_get_filename (h), hdr.case_size,
599 INT_MAX / sizeof (union value) / 2));
601 ext->compressed = hdr.compressed;
603 ext->weight_index = hdr.weight_index - 1;
604 if (hdr.weight_index < 0 || hdr.weight_index > hdr.case_size)
605 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
606 "and number of elements per case (%d)."),
607 handle_get_filename (h), hdr.weight_index, ext->case_size));
609 ext->ncases = hdr.ncases;
610 if (ext->ncases < -1 || ext->ncases > INT_MAX / 2)
611 lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and "
612 "%d."), handle_get_filename (h), (long) ext->ncases, INT_MAX / 2));
614 ext->bias = hdr.bias;
615 if (ext->bias != 100.0)
616 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
618 handle_get_filename (h), ext->bias);
620 /* Make a file label only on the condition that the given label is
621 not all spaces or nulls. */
625 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
626 if (!isspace ((unsigned char) hdr.file_label[i])
627 && hdr.file_label[i] != 0)
629 char *label = xmalloc (i + 2);
630 memcpy (label, hdr.file_label, i + 1);
632 dict_set_label (dict, label);
642 memcpy (inf->creation_date, hdr.creation_date, 9);
643 inf->creation_date[9] = 0;
645 memcpy (inf->creation_time, hdr.creation_time, 8);
646 inf->creation_time[8] = 0;
648 #ifdef WORDS_BIGENDIAN
649 inf->bigendian = !ext->reverse_endian;
651 inf->bigendian = ext->reverse_endian;
654 inf->compressed = hdr.compressed;
656 inf->ncases = hdr.ncases;
658 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
659 if (isgraph ((unsigned char) *cp))
661 strcpy (inf->product, cp);
670 /* Reads most of the dictionary from file H; also fills in the
671 associated VAR_BY_INDEX array. The get.* elements in the
672 created dictionary are set to appropriate values to allow the
675 read_variables (struct file_handle * h, struct variable *** var_by_index)
679 struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
680 struct dictionary *dict = ext->dict; /* Dictionary being constructed. */
681 struct sysfile_variable sv; /* Disk buffer. */
682 int long_string_count = 0; /* # of long string continuation
683 records still expected. */
684 int next_value = 0; /* Index to next `value' structure. */
686 /* Allocate variables. */
687 *var_by_index = xmalloc (sizeof **var_by_index * ext->case_size);
689 /* Read in the entry for each variable and use the info to
690 initialize the dictionary. */
691 for (i = 0; i < ext->case_size; i++)
697 assertive_bufread (h, &sv, sizeof sv, 0);
699 if (ext->reverse_endian)
701 bswap_int32 (&sv.rec_type);
702 bswap_int32 (&sv.type);
703 bswap_int32 (&sv.has_var_label);
704 bswap_int32 (&sv.n_missing_values);
705 bswap_int32 (&sv.print);
706 bswap_int32 (&sv.write);
709 if (sv.rec_type != 2)
710 lose ((ME, _("%s: position %d: Bad record type (%d); "
711 "the expected value was 2."),
712 handle_get_filename (h), i, sv.rec_type));
714 /* If there was a long string previously, make sure that the
715 continuations are present; otherwise make sure there aren't
717 if (long_string_count)
720 lose ((ME, _("%s: position %d: String variable does not have "
721 "proper number of continuation records."),
722 handle_get_filename (h), i));
724 (*var_by_index)[i] = NULL;
728 else if (sv.type == -1)
729 lose ((ME, _("%s: position %d: Superfluous long string continuation "
731 handle_get_filename (h), i));
733 /* Check fields for validity. */
734 if (sv.type < 0 || sv.type > 255)
735 lose ((ME, _("%s: position %d: Bad variable type code %d."),
736 handle_get_filename (h), i, sv.type));
737 if (sv.has_var_label != 0 && sv.has_var_label != 1)
738 lose ((ME, _("%s: position %d: Variable label indicator field is not "
739 "0 or 1."), handle_get_filename (h), i));
740 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
741 || sv.n_missing_values == -1)
742 lose ((ME, _("%s: position %d: Missing value indicator field is not "
743 "-3, -2, 0, 1, 2, or 3."), handle_get_filename (h), i));
745 /* Copy first character of variable name. */
746 if (!isalpha ((unsigned char) sv.name[0])
747 && sv.name[0] != '@' && sv.name[0] != '#')
748 lose ((ME, _("%s: position %d: Variable name begins with invalid "
750 handle_get_filename (h), i));
751 if (islower ((unsigned char) sv.name[0]))
752 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
754 handle_get_filename (h), i, sv.name[0]);
755 if (sv.name[0] == '#')
756 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
757 "(`#'). Scratch variables should not appear in system "
759 handle_get_filename (h), i);
760 name[0] = toupper ((unsigned char) (sv.name[0]));
762 /* Copy remaining characters of variable name. */
763 for (j = 1; j < 8; j++)
765 int c = (unsigned char) sv.name[j];
769 else if (islower (c))
771 msg (MW, _("%s: position %d: Variable name character %d is "
772 "lowercase letter %c."),
773 handle_get_filename (h), i, j + 1, sv.name[j]);
774 name[j] = toupper ((unsigned char) (c));
776 else if (isalnum (c) || c == '.' || c == '@'
777 || c == '#' || c == '$' || c == '_')
780 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
782 handle_get_filename (h), i, c, c));
786 /* Create variable. */
787 vv = (*var_by_index)[i] = dict_create_var (dict, name, sv.type);
789 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
790 handle_get_filename (h), name));
792 /* Case reading data. */
793 vv->get.fv = next_value;
797 vv->get.nv = DIV_RND_UP (sv.type, sizeof (flt64));
798 long_string_count = vv->get.nv - 1;
799 next_value += vv->get.nv;
801 /* Get variable label, if any. */
802 if (sv.has_var_label == 1)
807 /* Read length of label. */
808 assertive_bufread (h, &len, sizeof len, 0);
809 if (ext->reverse_endian)
813 if (len < 0 || len > 255)
814 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
816 handle_get_filename (h), vv->name, len));
818 /* Read label into variable structure. */
819 vv->label = bufread (h, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
820 if (vv->label == NULL)
822 vv->label[len] = '\0';
825 /* Set missing values. */
826 if (sv.n_missing_values != 0)
830 if (vv->width > MAX_SHORT_STRING)
831 lose ((ME, _("%s: Long string variable %s may not have missing "
833 handle_get_filename (h), vv->name));
835 assertive_bufread (h, mv, sizeof *mv * abs (sv.n_missing_values), 0);
837 if (ext->reverse_endian && vv->type == NUMERIC)
838 for (j = 0; j < abs (sv.n_missing_values); j++)
839 bswap_flt64 (&mv[j]);
841 if (sv.n_missing_values > 0)
843 vv->miss_type = sv.n_missing_values;
844 if (vv->type == NUMERIC)
845 for (j = 0; j < sv.n_missing_values; j++)
846 vv->missing[j].f = mv[j];
848 for (j = 0; j < sv.n_missing_values; j++)
849 memcpy (vv->missing[j].s, &mv[j], vv->width);
855 if (vv->type == ALPHA)
856 lose ((ME, _("%s: String variable %s may not have missing "
857 "values specified as a range."),
858 handle_get_filename (h), vv->name));
860 if (mv[0] == ext->lowest)
862 vv->miss_type = MISSING_LOW;
863 vv->missing[x++].f = mv[1];
865 else if (mv[1] == ext->highest)
867 vv->miss_type = MISSING_HIGH;
868 vv->missing[x++].f = mv[0];
872 vv->miss_type = MISSING_RANGE;
873 vv->missing[x++].f = mv[0];
874 vv->missing[x++].f = mv[1];
877 if (sv.n_missing_values == -3)
880 vv->missing[x++].f = mv[2];
885 vv->miss_type = MISSING_NONE;
887 if (!parse_format_spec (h, sv.print, &vv->print, vv)
888 || !parse_format_spec (h, sv.write, &vv->write, vv))
892 /* Some consistency checks. */
893 if (long_string_count != 0)
894 lose ((ME, _("%s: Long string continuation records omitted at end of "
896 handle_get_filename (h)));
897 if (next_value != ext->case_size)
898 lose ((ME, _("%s: System file header indicates %d variable positions but "
899 "%d were read from file."),
900 handle_get_filename (h), ext->case_size, next_value));
911 /* Translates the format spec from sysfile format to internal
914 parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct variable *vv)
916 v->type = translate_fmt ((s >> 16) & 0xff);
918 lose ((ME, _("%s: Bad format specifier byte (%d)."),
919 handle_get_filename (h), (s >> 16) & 0xff));
920 v->w = (s >> 8) & 0xff;
923 /* FIXME? Should verify the resulting specifier more thoroughly. */
926 lose ((ME, _("%s: Bad format specifier byte (%d)."),
927 handle_get_filename (h), (s >> 16) & 0xff));
928 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
929 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
930 handle_get_filename (h),
931 vv->type == ALPHA ? _("String") : _("Numeric"),
933 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
934 formats[v->type].name));
941 /* Reads value labels from sysfile H and inserts them into the
942 associated dictionary. */
944 read_value_labels (struct file_handle * h, struct variable ** var_by_index)
946 struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
950 unsigned char raw_value[8]; /* Value as uninterpreted bytes. */
951 union value value; /* Value. */
952 char *label; /* Null-terminated label string. */
955 struct label *labels = NULL;
956 int32 n_labels; /* Number of labels. */
958 struct variable **var = NULL; /* Associated variables. */
959 int32 n_vars; /* Number of associated variables. */
963 /* First step: read the contents of the type 3 record and record its
964 contents. Note that we can't do much with the data since we
965 don't know yet whether it is of numeric or string type. */
967 /* Read number of labels. */
968 assertive_bufread (h, &n_labels, sizeof n_labels, 0);
969 if (ext->reverse_endian)
970 bswap_int32 (&n_labels);
972 /* Allocate memory. */
973 labels = xmalloc (n_labels * sizeof *labels);
974 for (i = 0; i < n_labels; i++)
975 labels[i].label = NULL;
977 /* Read each value/label tuple into labels[]. */
978 for (i = 0; i < n_labels; i++)
980 struct label *label = labels + i;
981 unsigned char label_len;
985 assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0);
987 /* Read label length. */
988 assertive_bufread (h, &label_len, sizeof label_len, 0);
989 padded_len = ROUND_UP (label_len + 1, sizeof (flt64));
991 /* Read label, padding. */
992 label->label = xmalloc (padded_len + 1);
993 assertive_bufread (h, label->label, padded_len - 1, 0);
994 label->label[label_len] = 0;
997 /* Second step: Read the type 4 record that has the list of
998 variables to which the value labels are to be applied. */
1000 /* Read record type of type 4 record. */
1004 assertive_bufread (h, &rec_type, sizeof rec_type, 0);
1005 if (ext->reverse_endian)
1006 bswap_int32 (&rec_type);
1009 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1010 "follow value label record (type 3) as it should."),
1011 handle_get_filename (h)));
1014 /* Read number of variables associated with value label from type 4
1016 assertive_bufread (h, &n_vars, sizeof n_vars, 0);
1017 if (ext->reverse_endian)
1018 bswap_int32 (&n_vars);
1019 if (n_vars < 1 || n_vars > dict_get_var_cnt (ext->dict))
1020 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1021 "is not between 1 and the number of variables (%d)."),
1022 handle_get_filename (h), n_vars, dict_get_var_cnt (ext->dict)));
1024 /* Read the list of variables. */
1025 var = xmalloc (n_vars * sizeof *var);
1026 for (i = 0; i < n_vars; i++)
1031 /* Read variable index, check range. */
1032 assertive_bufread (h, &var_index, sizeof var_index, 0);
1033 if (ext->reverse_endian)
1034 bswap_int32 (&var_index);
1035 if (var_index < 1 || var_index > ext->case_size)
1036 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1037 "not between 1 and the number of values (%d)."),
1038 handle_get_filename (h), var_index, ext->case_size));
1040 /* Make sure it's a real variable. */
1041 v = var_by_index[var_index - 1];
1043 lose ((ME, _("%s: Variable index associated with value label (%d) "
1044 "refers to a continuation of a string variable, not to "
1045 "an actual variable."),
1046 handle_get_filename (h), var_index));
1047 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1048 lose ((ME, _("%s: Value labels are not allowed on long string "
1050 handle_get_filename (h), v->name));
1052 /* Add it to the list of variables. */
1056 /* Type check the variables. */
1057 for (i = 1; i < n_vars; i++)
1058 if (var[i]->type != var[0]->type)
1059 lose ((ME, _("%s: Variables associated with value label are not all of "
1060 "identical type. Variable %s has %s type, but variable "
1062 handle_get_filename (h),
1063 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1064 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1066 /* Fill in labels[].value, now that we know the desired type. */
1067 for (i = 0; i < n_labels; i++)
1069 struct label *label = labels + i;
1071 if (var[0]->type == ALPHA)
1073 const int copy_len = min (sizeof (label->raw_value),
1074 sizeof (label->label));
1075 memcpy (label->value.s, label->raw_value, copy_len);
1078 assert (sizeof f == sizeof label->raw_value);
1079 memcpy (&f, label->raw_value, sizeof f);
1080 if (ext->reverse_endian)
1086 /* Assign the value_label's to each variable. */
1087 for (i = 0; i < n_vars; i++)
1089 struct variable *v = var[i];
1092 /* Add each label to the variable. */
1093 for (j = 0; j < n_labels; j++)
1095 struct label *label = labels + j;
1096 if (!val_labs_replace (v->val_labs, label->value, label->label))
1099 if (var[0]->type == NUMERIC)
1100 msg (MW, _("%s: File contains duplicate label for value %g for "
1102 handle_get_filename (h), label->value.f, v->name);
1104 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1105 "for variable %s."),
1106 handle_get_filename (h), v->width, label->value.s, v->name);
1110 for (i = 0; i < n_labels; i++)
1111 free (labels[i].label);
1119 for (i = 0; i < n_labels; i++)
1120 free (labels[i].label);
1127 /* Reads NBYTES bytes from the file represented by H. If BUF is
1128 non-NULL, uses that as the buffer; otherwise allocates at least
1129 MINALLOC bytes. Returns a pointer to the buffer on success, NULL
1132 bufread (struct file_handle * h, void *buf, size_t nbytes, size_t minalloc)
1134 struct sfm_fhuser_ext *ext = h->ext;
1137 buf = xmalloc (max (nbytes, minalloc));
1138 if (1 != fread (buf, nbytes, 1, ext->file))
1140 if (ferror (ext->file))
1141 msg (ME, _("%s: Reading system file: %s."),
1142 handle_get_filename (h), strerror (errno));
1144 corrupt_msg (ME, _("%s: Unexpected end of file."),
1145 handle_get_filename (h));
1151 /* Reads a document record, type 6, from system file H, and sets up
1152 the documents and n_documents fields in the associated
1155 read_documents (struct file_handle * h)
1157 struct sfm_fhuser_ext *ext = h->ext;
1158 struct dictionary *dict = ext->dict;
1162 if (dict_get_documents (dict) != NULL)
1163 lose ((ME, _("%s: System file contains multiple "
1164 "type 6 (document) records."),
1165 handle_get_filename (h)));
1167 assertive_bufread (h, &n_lines, sizeof n_lines, 0);
1169 lose ((ME, _("%s: Number of document lines (%ld) "
1170 "must be greater than 0."),
1171 handle_get_filename (h), (long) n_lines));
1173 documents = bufread (h, NULL, 80 * n_lines, n_lines * 80 + 1);
1174 /* FIXME? Run through asciify. */
1175 if (documents == NULL)
1177 documents[80 * n_lines] = '\0';
1178 dict_set_documents (dict, documents);
1186 #if GLOBAL_DEBUGGING
1187 #include "debug-print.h"
1188 /* Displays dictionary DICT on stdout. */
1190 dump_dictionary (struct dictionary * dict)
1194 debug_printf ((_("dictionary:\n")));
1195 for (i = 0; i < dict->nvar; i++)
1198 struct variable *v = dict->var[i];
1201 debug_printf ((" var %s", v->name));
1202 debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num")
1203 : (v->type == ALPHA ? _("str") : "!!!")),
1205 debug_printf (("(fv:%d,%d)", v->fv, v->nv));
1206 debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right")));
1208 switch (v->miss_type)
1212 debug_printf ((_("none")));
1216 debug_printf ((_("one")));
1220 debug_printf ((_("two")));
1224 debug_printf ((_("three")));
1228 debug_printf ((_("range")));
1232 debug_printf ((_("low")));
1236 debug_printf ((_("high")));
1238 case MISSING_RANGE_1:
1240 debug_printf ((_("range+1")));
1244 debug_printf ((_("low+1")));
1246 case MISSING_HIGH_1:
1248 debug_printf ((_("high+1")));
1253 for (j = 0; j < n; j++)
1254 if (v->type == NUMERIC)
1255 debug_printf ((",%g", v->missing[j].f));
1257 debug_printf ((",\"%.*s\"", v->width, v->missing[j].s));
1258 strcpy (print, fmt_to_string (&v->print));
1259 debug_printf ((")(fmt:%s,%s)(lbl:%s)\n",
1260 print, fmt_to_string (&v->write),
1261 v->label ? v->label : "nolabel"));
1268 /* Reads compressed data into H->BUF and sets other pointers
1269 appropriately. Returns nonzero only if both no errors occur and
1272 buffer_input (struct file_handle * h)
1274 struct sfm_fhuser_ext *ext = h->ext;
1277 if (ext->buf == NULL)
1278 ext->buf = xmalloc (sizeof *ext->buf * 128);
1279 amt = fread (ext->buf, sizeof *ext->buf, 128, ext->file);
1280 if (ferror (ext->file))
1282 msg (ME, _("%s: Error reading file: %s."),
1283 handle_get_filename (h), strerror (errno));
1286 ext->ptr = ext->buf;
1287 ext->end = &ext->buf[amt];
1291 /* Reads a single case consisting of compressed data from system file
1292 H into the array TEMP[] according to dictionary DICT, and returns
1293 nonzero only if successful. */
1294 /* Data in system files is compressed in the following manner:
1295 data values are grouped into sets of eight; each of the eight has
1296 one instruction byte, which are output together in an octet; each
1297 byte gives a value for that byte or indicates that the value can be
1298 found following the instructions. */
1300 read_compressed_data (struct file_handle * h, flt64 * temp)
1302 struct sfm_fhuser_ext *ext = h->ext;
1304 const unsigned char *p_end = ext->x + sizeof (flt64);
1305 unsigned char *p = ext->y;
1307 const flt64 *temp_beg = temp;
1308 const flt64 *temp_end = &temp[ext->case_size];
1312 for (; p < p_end; p++)
1316 /* Code 0 is ignored. */
1319 /* Code 252 is end of file. */
1320 if (temp_beg != temp)
1321 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1322 "in partial case."),
1323 handle_get_filename (h)));
1326 /* Code 253 indicates that the value is stored explicitly
1327 following the instruction bytes. */
1328 if (ext->ptr == NULL || ext->ptr >= ext->end)
1329 if (!buffer_input (h))
1331 lose ((ME, _("%s: Unexpected end of file."),
1332 handle_get_filename (h)));
1335 memcpy (temp++, ext->ptr++, sizeof *temp);
1336 if (temp >= temp_end)
1340 /* Code 254 indicates a string that is all blanks. */
1341 memset (temp++, ' ', sizeof *temp);
1342 if (temp >= temp_end)
1346 /* Code 255 indicates the system-missing value. */
1347 *temp = ext->sysmis;
1348 if (ext->reverse_endian)
1351 if (temp >= temp_end)
1355 /* Codes 1 through 251 inclusive are taken to indicate a
1356 value of (BYTE - BIAS), where BYTE is the byte's value
1357 and BIAS is the compression bias (generally 100.0). */
1358 *temp = *p - ext->bias;
1359 if (ext->reverse_endian)
1362 if (temp >= temp_end)
1367 /* We have reached the end of this instruction octet. Read
1369 if (ext->ptr == NULL || ext->ptr >= ext->end)
1370 if (!buffer_input (h))
1372 if (temp_beg != temp)
1373 lose ((ME, _("%s: Unexpected end of file."),
1374 handle_get_filename (h)));
1377 memcpy (ext->x, ext->ptr++, sizeof *temp);
1385 /* We have filled up an entire record. Update state and return
1391 /* We have been unsuccessful at filling a record, either through i/o
1392 error or through an end-of-file indication. Update state and
1393 return unsuccessfully. */
1397 /* Reads one case from system file H into the value array PERM
1398 according to the instructions given in associated dictionary DICT,
1399 which must have the get.* elements appropriately set. Returns
1400 nonzero only if successful. */
1402 sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict)
1404 struct sfm_fhuser_ext *ext = h->ext;
1411 /* The first concern is to obtain a full case relative to the data
1412 file. (Cases in the data file have no particular relationship to
1413 cases in the active file.) */
1414 nbytes = sizeof *temp * ext->case_size;
1415 temp = local_alloc (nbytes);
1417 if (ext->compressed == 0)
1419 size_t amt = fread (temp, 1, nbytes, ext->file);
1423 if (ferror (ext->file))
1424 msg (ME, _("%s: Reading system file: %s."),
1425 handle_get_filename (h), strerror (errno));
1427 msg (ME, _("%s: Partial record at end of system file."),
1428 handle_get_filename (h));
1432 else if (!read_compressed_data (h, temp))
1435 /* Translate a case in data file format to a case in active file
1437 for (i = 0; i < dict_get_var_cnt (dict); i++)
1439 struct variable *v = dict_get_var (dict, i);
1441 if (v->get.fv == -1)
1444 if (v->type == NUMERIC)
1446 flt64 src = temp[v->get.fv];
1447 if (ext->reverse_endian)
1449 perm[v->fv].f = src == ext->sysmis ? SYSMIS : src;
1452 memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width);
1463 static struct fh_ext_class sfm_r_class =
1466 N_("reading as a system file"),