1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include "file-handle.h"
58 /*#define DEBUGGING 1*/
59 #include "debug-print.h"
61 /* PORTME: This file may require substantial revision for those
62 systems that don't meet the typical 32-bit integer/64-bit double
63 model. It's kinda hard to tell without having one of them on my
66 /* sfm's file_handle extension. */
69 FILE *file; /* Actual file. */
70 int opened; /* Reference count. */
72 struct dictionary *dict; /* File's dictionary. */
74 int reverse_endian; /* 1=file has endianness opposite us. */
75 int case_size; /* Number of `values's per case. */
76 long ncases; /* Number of cases, -1 if unknown. */
77 int compressed; /* 1=compressed, 0=not compressed. */
78 double bias; /* Compression bias, usually 100.0. */
79 int weight_index; /* 0-based index of weighting variable, or -1. */
81 /* File's special constants. */
86 /* Uncompression buffer. */
87 flt64 *buf; /* Buffer data. */
88 flt64 *ptr; /* Current location in buffer. */
89 flt64 *end; /* End of buffer data. */
91 /* Compression instruction octet. */
92 unsigned char x[sizeof (flt64)];
93 /* Current instruction octet. */
94 unsigned char *y; /* Location in current instruction octet. */
97 static struct fh_ext_class sfm_r_class;
100 void dump_dictionary (struct dictionary * dict);
105 /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */
107 #include <asm/byteorder.h>
108 #include <netinet/in.h>
110 bswap_int32 (int32 * x)
114 #else /* not Linux */
116 bswap_int32 (int32 * x)
118 unsigned char *y = (char *) x;
127 #endif /* not Linux */
129 /* Reverse the byte order of 64-bit floating point *X. */
131 bswap_flt64 (flt64 * x)
133 /* Note that under compilers of any quality, half of this function
134 should optimize out as dead code. */
135 unsigned char *y = (char *) x;
137 if (sizeof (flt64) == 8)
158 for (x = 0; x < sizeof (flt64) / 2; x++)
161 y[x] = y[sizeof (flt64) - x];
162 y[sizeof (flt64) - x] = t;
168 corrupt_msg (int class, const char *format,...)
169 __attribute__ ((format (printf, 2, 3)));
171 /* Displays a corrupt sysfile error. */
173 corrupt_msg (int class, const char *format,...)
180 va_start (args, format);
181 vsnprintf (buf, 1024, format, args);
189 getl_location (&e.where.filename, &e.where.line_number);
190 e.title = _("corrupt system file: ");
197 /* Closes a system file after we're done with it. */
199 sfm_close (struct file_handle * h)
201 struct sfm_fhuser_ext *ext = h->ext;
204 assert (ext->opened == 0);
205 if (EOF == fn_close (h->fn, ext->file))
206 msg (ME, _("%s: Closing system file: %s."), h->fn, strerror (errno));
211 /* Closes a system file if we're done with it. */
213 sfm_maybe_close (struct file_handle *h)
215 struct sfm_fhuser_ext *ext = h->ext;
217 if (ext->opened == 1)
223 /* Dictionary reader. */
225 static void *bufread (struct file_handle * handle, void *buf, size_t nbytes,
228 static int read_header (struct file_handle * h, struct sfm_read_info * inf);
229 static int parse_format_spec (struct file_handle * h, int32 s,
230 struct fmt_spec * v, struct variable *vv);
231 static int read_value_labels (struct file_handle * h, struct variable ** var_by_index);
232 static int read_variables (struct file_handle * h, struct variable *** var_by_index);
233 static int read_machine_int32_info (struct file_handle * h, int size, int count);
234 static int read_machine_flt64_info (struct file_handle * h, int size, int count);
235 static int read_documents (struct file_handle * h);
237 /* Displays the message X with corrupt_msg, then jumps to the lossage
247 /* Calls bufread with the specified arguments, and jumps to lossage if
249 #define assertive_bufread(a,b,c,d) \
252 if (!bufread (a,b,c,d)) \
257 /* Reads the dictionary from file with handle H, and returns it in a
258 dictionary structure. This dictionary may be modified in order to
259 rename, reorder, and delete variables, etc. */
261 sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf)
263 /* The file handle extension record. */
264 struct sfm_fhuser_ext *ext;
266 /* Allows for quick reference to variables according to indexes
267 relative to position within a case. */
268 struct variable **var_by_index = NULL;
270 /* Check whether the file is already open. */
271 if (h->class == &sfm_r_class)
277 else if (h->class != NULL)
279 msg (ME, _("Cannot read file %s as system file: already opened for %s."),
280 fh_handle_name (h), h->class->name);
284 msg (VM (1), _("%s: Opening system-file handle %s for reading."),
285 fh_handle_filename (h), fh_handle_name (h));
287 /* Open the physical disk file. */
288 ext = xmalloc (sizeof (struct sfm_fhuser_ext));
289 ext->file = fn_open (h->norm_fn, "rb");
290 if (ext->file == NULL)
292 msg (ME, _("An error occurred while opening \"%s\" for reading "
293 "as a system file: %s."), h->fn, strerror (errno));
299 /* Initialize the sfm_fhuser_ext structure. */
300 h->class = &sfm_r_class;
303 ext->buf = ext->ptr = ext->end = NULL;
304 ext->y = ext->x + sizeof ext->x;
307 /* Default special constants. */
308 ext->sysmis = -FLT64_MAX;
309 ext->highest = FLT64_MAX;
310 ext->lowest = second_lowest_flt64;
312 /* Read the header. */
313 if (!read_header (h, inf))
316 /* Read about the variables. */
317 if (!read_variables (h, &var_by_index))
320 /* Handle weighting. */
321 if (ext->weight_index != -1)
323 struct variable *wv = var_by_index[ext->weight_index];
326 lose ((ME, _("%s: Weighting variable may not be a continuation of "
327 "a long string variable."), h->fn));
328 else if (wv->type == ALPHA)
329 lose ((ME, _("%s: Weighting variable may not be a string variable."),
332 strcpy (ext->dict->weight_var, wv->name);
335 ext->dict->weight_var[0] = 0;
337 /* Read records of types 3, 4, 6, and 7. */
342 assertive_bufread (h, &rec_type, sizeof rec_type, 0);
343 if (ext->reverse_endian)
344 bswap_int32 (&rec_type);
349 if (!read_value_labels (h, var_by_index))
354 lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 "
355 "records must always immediately follow type 3 records."),
359 if (!read_documents (h))
375 assertive_bufread (h, &data, sizeof data, 0);
376 if (ext->reverse_endian)
378 bswap_int32 (&data.subtype);
379 bswap_int32 (&data.size);
380 bswap_int32 (&data.count);
383 /*if(data.size != sizeof(int32) && data.size != sizeof(flt64))
384 lose((ME, "%s: Element size in record type 7, subtype %d, is "
385 "not either the size of IN (%d) or OBS (%d); actual value "
387 h->fn, data.subtype, sizeof(int32), sizeof(flt64),
390 switch (data.subtype)
393 if (!read_machine_int32_info (h, data.size, data.count))
398 if (!read_machine_flt64_info (h, data.size, data.count))
404 case 11: /* ?? Used by SPSS 8.0. */
409 msg (MW, _("%s: Unrecognized record type 7, subtype %d "
410 "encountered in system file."), h->fn, data.subtype);
416 void *x = bufread (h, NULL, data.size * data.count, 0);
428 assertive_bufread (h, &filler, sizeof filler, 0);
429 goto break_out_of_loop;
433 lose ((ME, _("%s: Unrecognized record type %d."), h->fn, rec_type));
438 /* Come here on successful completion. */
439 msg (VM (2), _("Read system-file dictionary successfully."));
442 dump_dictionary (ext->dict);
448 /* Come here on unsuccessful completion. */
449 msg (VM (1), _("Error reading system-file header."));
452 fn_close (h->fn, ext->file);
453 if (ext && ext->dict)
454 free_dictionary (ext->dict);
461 /* Read record type 7, subtype 3. */
463 read_machine_int32_info (struct file_handle * h, int size, int count)
465 struct sfm_fhuser_ext *ext = h->ext;
472 if (size != sizeof (int32) || count != 8)
473 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
474 "subtype 3. Expected size %d, count 8."),
475 h->fn, size, count, sizeof (int32)));
477 assertive_bufread (h, data, sizeof data, 0);
478 if (ext->reverse_endian)
479 for (i = 0; i < 8; i++)
480 bswap_int32 (&data[i]);
482 /* PORTME: Check floating-point representation. */
485 lose ((ME, _("%s: Floating-point representation in system file is not "
486 "IEEE-754. PSPP cannot convert between floating-point "
487 "formats."), h->fn));
490 /* PORTME: Check recorded file endianness against intuited file
492 #ifdef WORDS_BIGENDIAN
497 if (ext->reverse_endian)
499 if (file_bigendian ^ (data[6] == 1))
500 lose ((ME, _("%s: File-indicated endianness (%s) does not match endianness "
501 "intuited from file header (%s)."),
502 h->fn, file_bigendian ? _("big-endian") : _("little-endian"),
503 data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian")
506 /* PORTME: Character representation code. */
507 if (data[7] != 2 && data[7] != 3)
508 lose ((ME, _("%s: File-indicated character representation code (%s) is not "
510 data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))));
518 /* Read record type 7, subtype 4. */
520 read_machine_flt64_info (struct file_handle * h, int size, int count)
522 struct sfm_fhuser_ext *ext = h->ext;
528 if (size != sizeof (flt64) || count != 3)
529 lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, "
530 "subtype 4. Expected size %d, count 8."),
531 h->fn, size, count, sizeof (flt64)));
533 assertive_bufread (h, data, sizeof data, 0);
534 if (ext->reverse_endian)
535 for (i = 0; i < 3; i++)
536 bswap_flt64 (&data[i]);
538 if (data[0] != SYSMIS || data[1] != FLT64_MAX
539 || data[2] != second_lowest_flt64)
541 ext->sysmis = data[0];
542 ext->highest = data[1];
543 ext->lowest = data[2];
544 msg (MW, _("%s: File-indicated value is different from internal value "
545 "for at least one of the three system values. SYSMIS: "
546 "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: "
548 h->fn, (double) data[0], (double) SYSMIS,
549 (double) data[1], (double) FLT64_MAX,
550 (double) data[2], (double) second_lowest_flt64);
560 read_header (struct file_handle * h, struct sfm_read_info * inf)
562 struct sfm_fhuser_ext *ext = h->ext; /* File extension strcut. */
563 struct sysfile_header hdr; /* Disk buffer. */
564 struct dictionary *dict; /* File dictionary. */
565 char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
566 int skip_amt; /* Amount of product name to omit. */
569 /* Create the dictionary. */
570 dict = ext->dict = xmalloc (sizeof *dict);
572 dict->var_by_name = NULL;
575 dict->nval = -1; /* Unknown. */
578 dict->weight_var[0] = 0;
579 dict->weight_index = -1;
580 dict->filter_var[0] = 0;
582 dict->n_documents = 0;
583 dict->documents = NULL;
585 /* Read header, check magic. */
586 assertive_bufread (h, &hdr, sizeof hdr, 0);
587 if (0 != strncmp ("$FL2", hdr.rec_type, 4))
588 lose ((ME, _("%s: Bad magic. Proper system files begin with "
589 "the four characters `$FL2'. This file will not be read."),
592 /* Check eye-catcher string. */
593 memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name);
594 for (i = 0; i < 60; i++)
595 if (!isprint ((unsigned char) prod_name[i]))
597 for (i = 59; i >= 0; i--)
598 if (!isgraph ((unsigned char) prod_name[i]))
603 prod_name[60] = '\0';
607 static const char *prefix[N_PREFIXES] =
609 "@(#) SPSS DATA FILE",
615 for (i = 0; i < N_PREFIXES; i++)
616 if (!strncmp (prefix[i], hdr.prod_name, strlen (prefix[i])))
618 skip_amt = strlen (prefix[i]);
623 /* Check endianness. */
624 /* PORTME: endianness. */
625 if (hdr.layout_code == 2)
626 ext->reverse_endian = 0;
629 bswap_int32 (&hdr.layout_code);
630 if (hdr.layout_code != 2)
631 lose ((ME, _("%s: File layout code has unexpected value %d. Value "
632 "should be 2, in big-endian or little-endian format."),
633 h->fn, hdr.layout_code));
635 ext->reverse_endian = 1;
636 bswap_int32 (&hdr.case_size);
637 bswap_int32 (&hdr.compressed);
638 bswap_int32 (&hdr.weight_index);
639 bswap_int32 (&hdr.ncases);
640 bswap_flt64 (&hdr.bias);
643 /* Copy basic info and verify correctness. */
644 ext->case_size = hdr.case_size;
645 if (hdr.case_size <= 0 || ext->case_size > (INT_MAX
646 / (int) sizeof (union value) / 2))
647 lose ((ME, _("%s: Number of elements per case (%d) is not between 1 "
648 "and %d."), h->fn, hdr.case_size, INT_MAX / sizeof (union value) / 2));
650 ext->compressed = hdr.compressed;
652 ext->weight_index = hdr.weight_index - 1;
653 if (hdr.weight_index < 0 || hdr.weight_index > hdr.case_size)
654 lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 "
655 "and number of elements per case (%d)."),
656 h->fn, hdr.weight_index, ext->case_size));
658 ext->ncases = hdr.ncases;
659 if (ext->ncases < -1 || ext->ncases > INT_MAX / 2)
660 lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and "
661 "%d."), h->fn, (long) ext->ncases, INT_MAX / 2));
663 ext->bias = hdr.bias;
664 if (ext->bias != 100.0)
665 corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual "
666 "value of 100."), h->fn, ext->bias);
668 /* Make a file label only on the condition that the given label is
669 not all spaces or nulls. */
674 for (i = sizeof hdr.file_label - 1; i >= 0; i--)
675 if (!isspace ((unsigned char) hdr.file_label[i])
676 && hdr.file_label[i] != 0)
678 dict->label = xmalloc (i + 2);
679 memcpy (dict->label, hdr.file_label, i + 1);
680 dict->label[i + 1] = 0;
689 memcpy (inf->creation_date, hdr.creation_date, 9);
690 inf->creation_date[9] = 0;
692 memcpy (inf->creation_time, hdr.creation_time, 8);
693 inf->creation_time[8] = 0;
695 #ifdef WORDS_BIGENDIAN
696 inf->bigendian = !ext->reverse_endian;
698 inf->bigendian = ext->reverse_endian;
701 inf->compressed = hdr.compressed;
703 inf->ncases = hdr.ncases;
705 for (cp = &prod_name[skip_amt]; cp < &prod_name[60]; cp++)
706 if (isgraph ((unsigned char) *cp))
708 strcpy (inf->product, cp);
717 /* Reads most of the dictionary from file H; also fills in the
718 associated VAR_BY_INDEX array.
720 Note: the dictionary returned by this function has an invalid NVAL
721 element, also the VAR[] array does not have the FV and LV elements
722 set, however the NV elements *are* set. This is because the caller
723 will probably modify the dictionary before reading it in from the
724 file. Also, the get.* elements are set to appropriate values to
725 allow the file to be read. */
727 read_variables (struct file_handle * h, struct variable *** var_by_index)
731 struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
732 struct dictionary *dict = ext->dict; /* Dictionary being constructed. */
733 struct sysfile_variable sv; /* Disk buffer. */
734 int long_string_count = 0; /* # of long string continuation
735 records still expected. */
736 int next_value = 0; /* Index to next `value' structure. */
738 /* Allocate variables. */
739 dict->var = xmalloc (sizeof *dict->var * ext->case_size);
740 *var_by_index = xmalloc (sizeof **var_by_index * ext->case_size);
742 /* Read in the entry for each variable and use the info to
743 initialize the dictionary. */
744 for (i = 0; i < ext->case_size; i++)
749 assertive_bufread (h, &sv, sizeof sv, 0);
751 if (ext->reverse_endian)
753 bswap_int32 (&sv.rec_type);
754 bswap_int32 (&sv.type);
755 bswap_int32 (&sv.has_var_label);
756 bswap_int32 (&sv.n_missing_values);
757 bswap_int32 (&sv.print);
758 bswap_int32 (&sv.write);
761 if (sv.rec_type != 2)
762 lose ((ME, _("%s: position %d: Bad record type (%d); "
763 "the expected value was 2."), h->fn, i, sv.rec_type));
765 /* If there was a long string previously, make sure that the
766 continuations are present; otherwise make sure there aren't
768 if (long_string_count)
771 lose ((ME, _("%s: position %d: String variable does not have "
772 "proper number of continuation records."), h->fn, i));
774 (*var_by_index)[i] = NULL;
778 else if (sv.type == -1)
779 lose ((ME, _("%s: position %d: Superfluous long string continuation "
780 "record."), h->fn, i));
782 /* Check fields for validity. */
783 if (sv.type < 0 || sv.type > 255)
784 lose ((ME, _("%s: position %d: Bad variable type code %d."),
786 if (sv.has_var_label != 0 && sv.has_var_label != 1)
787 lose ((ME, _("%s: position %d: Variable label indicator field is not "
788 "0 or 1."), h->fn, i));
789 if (sv.n_missing_values < -3 || sv.n_missing_values > 3
790 || sv.n_missing_values == -1)
791 lose ((ME, _("%s: position %d: Missing value indicator field is not "
792 "-3, -2, 0, 1, 2, or 3."), h->fn, i));
794 /* Construct internal variable structure, initialize critical bits. */
795 vv = (*var_by_index)[i] = dict->var[dict->nvar++] = xmalloc (sizeof *vv);
796 vv->index = dict->nvar - 1;
801 /* Copy first character of variable name. */
802 if (!isalpha ((unsigned char) sv.name[0])
803 && sv.name[0] != '@' && sv.name[0] != '#')
804 lose ((ME, _("%s: position %d: Variable name begins with invalid "
805 "character."), h->fn, i));
806 if (islower ((unsigned char) sv.name[0]))
807 msg (MW, _("%s: position %d: Variable name begins with lowercase letter "
808 "%c."), h->fn, i, sv.name[0]);
809 if (sv.name[0] == '#')
810 msg (MW, _("%s: position %d: Variable name begins with octothorpe "
811 "(`#'). Scratch variables should not appear in system "
812 "files."), h->fn, i);
813 vv->name[0] = toupper ((unsigned char) (sv.name[0]));
815 /* Copy remaining characters of variable name. */
816 for (j = 1; j < 8; j++)
818 int c = (unsigned char) sv.name[j];
822 else if (islower (c))
824 msg (MW, _("%s: position %d: Variable name character %d is "
825 "lowercase letter %c."), h->fn, i, j + 1, sv.name[j]);
826 vv->name[j] = toupper ((unsigned char) (c));
828 else if (isalnum (c) || c == '.' || c == '@'
829 || c == '#' || c == '$' || c == '_')
832 lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a "
833 "variable name."), h->fn, i, c, c));
837 /* Set type, width, and `left' fields and allocate `value'
844 vv->get.fv = next_value++;
851 vv->nv = DIV_RND_UP (vv->width, MAX_SHORT_STRING);
852 vv->get.nv = DIV_RND_UP (vv->width, sizeof (flt64));
853 vv->get.fv = next_value;
854 next_value += vv->get.nv;
855 long_string_count = vv->get.nv - 1;
857 vv->left = (vv->name[0] == '#');
859 /* Get variable label, if any. */
860 if (sv.has_var_label == 1)
865 /* Read length of label. */
866 assertive_bufread (h, &len, sizeof len, 0);
867 if (ext->reverse_endian)
871 if (len < 0 || len > 255)
872 lose ((ME, _("%s: Variable %s indicates variable label of invalid "
873 "length %d."), h->fn, vv->name, len));
875 /* Read label into variable structure. */
876 vv->label = bufread (h, NULL, ROUND_UP (len, sizeof (int32)), len + 1);
877 if (vv->label == NULL)
879 vv->label[len] = '\0';
882 /* Set missing values. */
883 if (sv.n_missing_values != 0)
887 if (vv->width > MAX_SHORT_STRING)
888 lose ((ME, _("%s: Long string variable %s may not have missing "
889 "values."), h->fn, vv->name));
891 assertive_bufread (h, mv, sizeof *mv * abs (sv.n_missing_values), 0);
893 if (ext->reverse_endian && vv->type == NUMERIC)
894 for (j = 0; j < abs (sv.n_missing_values); j++)
895 bswap_flt64 (&mv[j]);
897 if (sv.n_missing_values > 0)
899 vv->miss_type = sv.n_missing_values;
900 if (vv->type == NUMERIC)
901 for (j = 0; j < sv.n_missing_values; j++)
902 vv->missing[j].f = mv[j];
904 for (j = 0; j < sv.n_missing_values; j++)
905 memcpy (vv->missing[j].s, &mv[j], vv->width);
911 if (vv->type == ALPHA)
912 lose ((ME, _("%s: String variable %s may not have missing "
913 "values specified as a range."), h->fn, vv->name));
915 if (mv[0] == ext->lowest)
917 vv->miss_type = MISSING_LOW;
918 vv->missing[x++].f = mv[1];
920 else if (mv[1] == ext->highest)
922 vv->miss_type = MISSING_HIGH;
923 vv->missing[x++].f = mv[0];
927 vv->miss_type = MISSING_RANGE;
928 vv->missing[x++].f = mv[0];
929 vv->missing[x++].f = mv[1];
932 if (sv.n_missing_values == -3)
935 vv->missing[x++].f = mv[2];
940 vv->miss_type = MISSING_NONE;
942 if (!parse_format_spec (h, sv.print, &vv->print, vv)
943 || !parse_format_spec (h, sv.write, &vv->write, vv))
947 /* Some consistency checks. */
948 if (long_string_count != 0)
949 lose ((ME, _("%s: Long string continuation records omitted at end of "
950 "dictionary."), h->fn));
951 if (next_value != ext->case_size)
952 lose ((ME, _("%s: System file header indicates %d variable positions but "
953 "%d were read from file."), h->fn, ext->case_size, next_value));
954 dict->var = xrealloc (dict->var, sizeof *dict->var * dict->nvar);
956 /* Construct AVL tree of dictionary in order to speed up later
957 processing and to check for duplicate varnames. */
958 dict->var_by_name = avl_create (NULL, cmp_variable, NULL);
959 for (i = 0; i < dict->nvar; i++)
960 if (NULL != avl_insert (dict->var_by_name, dict->var[i]))
961 lose ((ME, _("%s: Duplicate variable name `%s' within system file."),
962 h->fn, dict->var[i]->name));
967 for (i = 0; i < dict->nvar; i++)
969 free (dict->var[i]->label);
973 if (dict->var_by_name)
974 avl_destroy (dict->var_by_name, NULL);
981 /* Translates the format spec from sysfile format to internal
984 parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct variable *vv)
986 if ((size_t) ((s >> 16) & 0xff)
987 >= sizeof translate_fmt / sizeof *translate_fmt)
988 lose ((ME, _("%s: Bad format specifier byte (%d)."),
989 h->fn, (s >> 16) & 0xff));
991 v->type = translate_fmt[(s >> 16) & 0xff];
992 v->w = (s >> 8) & 0xff;
995 /* FIXME? Should verify the resulting specifier more thoroughly. */
998 lose ((ME, _("%s: Bad format specifier byte (%d)."),
999 h->fn, (s >> 16) & 0xff));
1000 if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0))
1001 lose ((ME, _("%s: %s variable %s has %s format specifier %s."),
1002 h->fn, vv->type == ALPHA ? _("String") : _("Numeric"),
1004 formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"),
1005 formats[v->type].name));
1012 /* Reads value labels from sysfile H and inserts them into the
1013 associated dictionary. */
1015 read_value_labels (struct file_handle * h, struct variable ** var_by_index)
1017 struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */
1019 flt64 *raw_label = NULL; /* Array of raw label values. */
1020 struct value_label **cooked_label = NULL; /* Array of cooked labels. */
1021 int32 n_labels; /* Number of labels. */
1023 struct variable **var = NULL; /* Associated variables. */
1024 int32 n_vars; /* Number of associated variables. */
1028 /* First step: read the contents of the type 3 record and record its
1029 contents. Note that we can't do much with the data since we
1030 don't know yet whether it is of numeric or string type. */
1032 /* Read number of labels. */
1033 assertive_bufread (h, &n_labels, sizeof n_labels, 0);
1034 if (ext->reverse_endian)
1035 bswap_int32 (&n_labels);
1037 /* Allocate memory. */
1038 raw_label = xmalloc (sizeof *raw_label * n_labels);
1039 cooked_label = xmalloc (sizeof *cooked_label * n_labels);
1040 for (i = 0; i < n_labels; i++)
1041 cooked_label[i] = NULL;
1043 /* Read each value/label tuple. */
1044 for (i = 0; i < n_labels; i++)
1047 unsigned char label_len;
1051 /* Read value, label length. */
1052 assertive_bufread (h, &value, sizeof value, 0);
1053 assertive_bufread (h, &label_len, 1, 0);
1054 memcpy (&raw_label[i], &value, sizeof value);
1057 cooked_label[i] = xmalloc (sizeof **cooked_label);
1058 cooked_label[i]->s = xmalloc (label_len + 1);
1059 assertive_bufread (h, cooked_label[i]->s, label_len, 0);
1060 cooked_label[i]->s[label_len] = 0;
1063 rem = REM_RND_UP (label_len + 1, sizeof (flt64));
1065 assertive_bufread (h, &value, rem, 0);
1068 /* Second step: Read the type 4 record that has the list of
1069 variables to which the value labels are to be applied. */
1071 /* Read record type of type 4 record. */
1075 assertive_bufread (h, &rec_type, sizeof rec_type, 0);
1076 if (ext->reverse_endian)
1077 bswap_int32 (&rec_type);
1080 lose ((ME, _("%s: Variable index record (type 4) does not immediately "
1081 "follow value label record (type 3) as it ought."), h->fn));
1084 /* Read number of variables associated with value label from type 4
1086 assertive_bufread (h, &n_vars, sizeof n_vars, 0);
1087 if (ext->reverse_endian)
1088 bswap_int32 (&n_vars);
1089 if (n_vars < 1 || n_vars > ext->dict->nvar)
1090 lose ((ME, _("%s: Number of variables associated with a value label (%d) "
1091 "is not between 1 and the number of variables (%d)."),
1092 h->fn, n_vars, ext->dict->nvar));
1094 /* Allocate storage. */
1095 var = xmalloc (sizeof *var * n_vars);
1097 /* Read the list of variables. */
1098 for (i = 0; i < n_vars; i++)
1103 /* Read variable index, check range. */
1104 assertive_bufread (h, &var_index, sizeof var_index, 0);
1105 if (ext->reverse_endian)
1106 bswap_int32 (&var_index);
1107 if (var_index < 1 || var_index > ext->case_size)
1108 lose ((ME, _("%s: Variable index associated with value label (%d) is "
1109 "not between 1 and the number of values (%d)."),
1110 h->fn, var_index, ext->case_size));
1112 /* Make sure it's a real variable. */
1113 v = var_by_index[var_index - 1];
1115 lose ((ME, _("%s: Variable index associated with value label (%d) refers "
1116 "to a continuation of a string variable, not to an actual "
1117 "variable."), h->fn, var_index));
1118 if (v->type == ALPHA && v->width > MAX_SHORT_STRING)
1119 lose ((ME, _("%s: Value labels are not allowed on long string variables "
1120 "(%s)."), h->fn, v->name));
1122 /* Add it to the list of variables. */
1126 /* Type check the variables. */
1127 for (i = 1; i < n_vars; i++)
1128 if (var[i]->type != var[0]->type)
1129 lose ((ME, _("%s: Variables associated with value label are not all of "
1130 "identical type. Variable %s has %s type, but variable %s has "
1132 var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"),
1133 var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric")));
1135 /* Create a value_label for each value/label tuple, now that we know
1136 the desired type. */
1137 for (i = 0; i < n_labels; i++)
1139 if (var[0]->type == ALPHA)
1141 const int copy_len = min (sizeof (flt64), MAX_SHORT_STRING);
1142 memcpy (cooked_label[i]->v.s, (char *) &raw_label[i], copy_len);
1143 if (MAX_SHORT_STRING > copy_len)
1144 memset (&cooked_label[i]->v.s[copy_len], ' ',
1145 MAX_SHORT_STRING - copy_len);
1147 cooked_label[i]->v.f = raw_label[i];
1148 if (ext->reverse_endian)
1149 bswap_flt64 (&cooked_label[i]->v.f);
1151 cooked_label[i]->ref_count = n_vars;
1154 /* Assign the value_label's to each variable. */
1155 for (i = 0; i < n_vars; i++)
1157 struct variable *v = var[i];
1160 /* Create AVL tree if necessary. */
1162 v->val_lab = avl_create (NULL, val_lab_cmp, (void *) (v->width));
1164 /* Add each label to the variable. */
1165 for (j = 0; j < n_labels; j++)
1167 struct value_label *old = avl_replace (v->val_lab, cooked_label[j]);
1171 if (var[0]->type == NUMERIC)
1172 msg (MW, _("%s: File contains duplicate label for value %g for "
1173 "variable %s."), h->fn, cooked_label[j]->v.f, v->name);
1175 msg (MW, _("%s: File contains duplicate label for value `%.*s' "
1176 "for variable %s."), h->fn, v->width,
1177 cooked_label[j]->v.s, v->name);
1179 free_value_label (old);
1183 free (cooked_label);
1190 for (i = 0; i < n_labels; i++)
1191 if (cooked_label[i])
1193 free (cooked_label[i]->s);
1194 free (cooked_label[i]);
1201 /* Reads NBYTES bytes from the file represented by H. If BUF is
1202 non-NULL, uses that as the buffer; otherwise allocates at least
1203 MINALLOC bytes. Returns a pointer to the buffer on success, NULL
1206 bufread (struct file_handle * h, void *buf, size_t nbytes, size_t minalloc)
1208 struct sfm_fhuser_ext *ext = h->ext;
1211 buf = xmalloc (max (nbytes, minalloc));
1212 if (1 != fread (buf, nbytes, 1, ext->file))
1214 if (ferror (ext->file))
1215 msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno));
1217 corrupt_msg (ME, _("%s: Unexpected end of file."), h->fn);
1223 /* Reads a document record, type 6, from system file H, and sets up
1224 the documents and n_documents fields in the associated
1227 read_documents (struct file_handle * h)
1229 struct sfm_fhuser_ext *ext = h->ext;
1230 struct dictionary *dict = ext->dict;
1233 if (dict->documents != NULL)
1234 lose ((ME, _("%s: System file contains multiple type 6 (document) records."),
1237 assertive_bufread (h, &n_lines, sizeof n_lines, 0);
1238 dict->n_documents = n_lines;
1239 if (dict->n_documents <= 0)
1240 lose ((ME, _("%s: Number of document lines (%ld) must be greater than 0."),
1241 h->fn, (long) dict->n_documents));
1243 dict->documents = bufread (h, NULL, 80 * n_lines, 0);
1244 if (dict->documents == NULL)
1252 #if GLOBAL_DEBUGGING
1254 #include "debug-print.h"
1255 /* Displays dictionary DICT on stdout. */
1257 dump_dictionary (struct dictionary * dict)
1261 debug_printf ((_("dictionary:\n")));
1262 for (i = 0; i < dict->nvar; i++)
1265 struct variable *v = dict->var[i];
1268 debug_printf ((" var %s", v->name));
1269 /*debug_printf (("(indices:%d,%d)", v->index, v->foo));*/
1270 debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num")
1271 : (v->type == ALPHA ? _("str") : "!!!")),
1273 debug_printf (("(fv:%d,%d)", v->fv, v->nv));
1274 /*debug_printf (("(get.fv:%d,%d)", v->get.fv, v->get.nv));*/
1275 debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right")));
1277 switch (v->miss_type)
1281 debug_printf ((_("none")));
1285 debug_printf ((_("one")));
1289 debug_printf ((_("two")));
1293 debug_printf ((_("three")));
1297 debug_printf ((_("range")));
1301 debug_printf ((_("low")));
1305 debug_printf ((_("high")));
1307 case MISSING_RANGE_1:
1309 debug_printf ((_("range+1")));
1313 debug_printf ((_("low+1")));
1315 case MISSING_HIGH_1:
1317 debug_printf ((_("high+1")));
1322 for (j = 0; j < n; j++)
1323 if (v->type == NUMERIC)
1324 debug_printf ((",%g", v->missing[j].f));
1326 debug_printf ((",\"%.*s\"", v->width, v->missing[j].s));
1327 strcpy (print, fmt_to_string (&v->print));
1328 debug_printf ((")(fmt:%s,%s)(lbl:%s)\n",
1329 print, fmt_to_string (&v->write),
1330 v->label ? v->label : "nolabel"));
1337 /* Reads compressed data into H->BUF and sets other pointers
1338 appropriately. Returns nonzero only if both no errors occur and
1341 buffer_input (struct file_handle * h)
1343 struct sfm_fhuser_ext *ext = h->ext;
1346 if (ext->buf == NULL)
1347 ext->buf = xmalloc (sizeof *ext->buf * 128);
1348 amt = fread (ext->buf, sizeof *ext->buf, 128, ext->file);
1349 if (ferror (ext->file))
1351 msg (ME, _("%s: Error reading file: %s."), h->fn, strerror (errno));
1354 ext->ptr = ext->buf;
1355 ext->end = &ext->buf[amt];
1359 /* Reads a single case consisting of compressed data from system file
1360 H into the array TEMP[] according to dictionary DICT, and returns
1361 nonzero only if successful. */
1362 /* Data in system files is compressed in the following manner:
1363 data values are grouped into sets of eight; each of the eight has
1364 one instruction byte, which are output together in an octet; each
1365 byte gives a value for that byte or indicates that the value can be
1366 found following the instructions. */
1368 read_compressed_data (struct file_handle * h, flt64 * temp)
1370 struct sfm_fhuser_ext *ext = h->ext;
1372 const unsigned char *p_end = ext->x + sizeof (flt64);
1373 unsigned char *p = ext->y;
1375 const flt64 *temp_beg = temp;
1376 const flt64 *temp_end = &temp[ext->case_size];
1380 for (; p < p_end; p++)
1384 /* Code 0 is ignored. */
1387 /* Code 252 is end of file. */
1388 if (temp_beg != temp)
1389 lose ((ME, _("%s: Compressed data is corrupted. Data ends "
1390 "partway through a case."), h->fn));
1393 /* Code 253 indicates that the value is stored explicitly
1394 following the instruction bytes. */
1395 if (ext->ptr == NULL || ext->ptr >= ext->end)
1396 if (!buffer_input (h))
1398 lose ((ME, _("%s: Unexpected end of file."), h->fn));
1401 memcpy (temp++, ext->ptr++, sizeof *temp);
1402 if (temp >= temp_end)
1406 /* Code 254 indicates a string that is all blanks. */
1407 memset (temp++, ' ', sizeof *temp);
1408 if (temp >= temp_end)
1412 /* Code 255 indicates the system-missing value. */
1413 *temp = ext->sysmis;
1414 if (ext->reverse_endian)
1417 if (temp >= temp_end)
1421 /* Codes 1 through 251 inclusive are taken to indicate a
1422 value of (BYTE - BIAS), where BYTE is the byte's value
1423 and BIAS is the compression bias (generally 100.0). */
1424 *temp = *p - ext->bias;
1425 if (ext->reverse_endian)
1428 if (temp >= temp_end)
1433 /* We have reached the end of this instruction octet. Read
1435 if (ext->ptr == NULL || ext->ptr >= ext->end)
1436 if (!buffer_input (h))
1438 if (temp_beg != temp)
1439 lose ((ME, _("%s: Unexpected end of file."), h->fn));
1442 memcpy (ext->x, ext->ptr++, sizeof *temp);
1450 /* We have filled up an entire record. Update state and return
1456 /* We have been unsuccessful at filling a record, either through i/o
1457 error or through an end-of-file indication. Update state and
1458 return unsuccessfully. */
1462 /* Reads one case from system file H into the value array PERM
1463 according to the instructions given in associated dictionary DICT,
1464 which must have the get.* elements appropriately set. Returns
1465 nonzero only if successful. */
1467 sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict)
1469 struct sfm_fhuser_ext *ext = h->ext;
1476 /* Make sure the caller remembered to finish polishing the
1477 dictionary returned by sfm_read_dictionary(). */
1478 assert (dict->nval > 0);
1480 /* The first concern is to obtain a full case relative to the data
1481 file. (Cases in the data file have no particular relationship to
1482 cases in the active file.) */
1483 nbytes = sizeof *temp * ext->case_size;
1484 temp = local_alloc (nbytes);
1486 if (ext->compressed == 0)
1488 size_t amt = fread (temp, 1, nbytes, ext->file);
1492 if (ferror (ext->file))
1493 msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno));
1495 msg (ME, _("%s: Partial record at end of system file."), h->fn);
1499 else if (!read_compressed_data (h, temp))
1502 /* Translate a case in data file format to a case in active file
1504 for (i = 0; i < dict->nvar; i++)
1506 struct variable *v = dict->var[i];
1508 if (v->get.fv == -1)
1511 if (v->type == NUMERIC)
1513 flt64 src = temp[v->get.fv];
1514 if (ext->reverse_endian)
1516 perm[v->fv].f = src == ext->sysmis ? SYSMIS : src;
1519 memcpy (&perm[v->fv].s, &temp[v->get.fv], v->width);
1530 static struct fh_ext_class sfm_r_class =
1533 N_("reading as a system file"),