X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fsfm-read.c;h=fad27c94a8d13537f37c4692550fc5e19f977c51;hb=b9e28aa5614a079548c616bcf97aa804024ad647;hp=99b6151b95a9ff3a5ce6203a3eb14697644d6141;hpb=2e0595dd8e344dbdcab740d7d2a3b67d153d6b39;p=pspp-builds.git diff --git a/src/sfm-read.c b/src/sfm-read.c index 99b6151b..fad27c94 100644 --- a/src/sfm-read.c +++ b/src/sfm-read.c @@ -17,45 +17,27 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* AIX requires this to be the first thing in the file. */ #include -#if __GNUC__ -#define alloca __builtin_alloca -#else -#if HAVE_ALLOCA_H -#include -#else -#ifdef _AIX -#pragma alloca -#else -#ifndef alloca /* predefined by HP cc +Olibcalls */ -char *alloca (); -#endif -#endif -#endif -#endif - -#include +#include "sfm.h" +#include "sfmP.h" +#include "error.h" #include #include #include #include #include "alloc.h" -#include "avl.h" #include "error.h" #include "file-handle.h" #include "filename.h" #include "format.h" #include "getline.h" +#include "hash.h" #include "magic.h" #include "misc.h" -#include "sfm.h" -#include "sfmP.h" +#include "value-labels.h" #include "str.h" #include "var.h" -#undef DEBUGGING -/*#define DEBUGGING 1*/ #include "debug-print.h" /* PORTME: This file may require substantial revision for those @@ -103,70 +85,48 @@ void dump_dictionary (struct dictionary * dict); /* Utilities. */ /* bswap_int32(): Reverse the byte order of 32-bit integer *X. */ -#if __linux__ -#include -#include static inline void -bswap_int32 (int32 * x) +bswap_int32 (int32 *x) { - *x = ntohl (*x); -} -#else /* not Linux */ -static inline void -bswap_int32 (int32 * x) -{ - unsigned char *y = (char *) x; + unsigned char *y = (unsigned char *) x; unsigned char t; + t = y[0]; y[0] = y[3]; y[3] = t; + t = y[1]; y[1] = y[2]; y[2] = t; } -#endif /* not Linux */ /* Reverse the byte order of 64-bit floating point *X. */ static inline void -bswap_flt64 (flt64 * x) +bswap_flt64 (flt64 *x) { - /* Note that under compilers of any quality, half of this function - should optimize out as dead code. */ - unsigned char *y = (char *) x; + unsigned char *y = (unsigned char *) x; + unsigned char t; - if (sizeof (flt64) == 8) - { - unsigned char t; - t = y[0]; - y[0] = y[7]; - y[7] = t; - t = y[1]; - y[1] = y[6]; - y[6] = t; - t = y[2]; - y[2] = y[5]; - y[5] = t; - t = y[3]; - y[3] = y[4]; - y[4] = t; - } - else - { - unsigned char t; - size_t x; + t = y[0]; + y[0] = y[7]; + y[7] = t; - for (x = 0; x < sizeof (flt64) / 2; x++) - { - t = y[x]; - y[x] = y[sizeof (flt64) - x]; - y[sizeof (flt64) - x] = t; - } - } + t = y[1]; + y[1] = y[6]; + y[6] = t; + + t = y[2]; + y[2] = y[5]; + y[5] = t; + + t = y[3]; + y[3] = y[4]; + y[4] = t; } static void corrupt_msg (int class, const char *format,...) - __attribute__ ((format (printf, 2, 3))); + PRINTF_FORMAT (2, 3); /* Displays a corrupt sysfile error. */ static void @@ -196,14 +156,15 @@ corrupt_msg (int class, const char *format,...) /* Closes a system file after we're done with it. */ static void -sfm_close (struct file_handle * h) +sfm_close (struct file_handle *h) { struct sfm_fhuser_ext *ext = h->ext; ext->opened--; assert (ext->opened == 0); - if (EOF == fn_close (h->fn, ext->file)) - msg (ME, _("%s: Closing system file: %s."), h->fn, strerror (errno)); + if (EOF == fn_close (handle_get_filename (h), ext->file)) + msg (ME, _("%s: Closing system file: %s."), + handle_get_filename (h), strerror (errno)); free (ext->buf); free (h->ext); } @@ -277,20 +238,21 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) else if (h->class != NULL) { msg (ME, _("Cannot read file %s as system file: already opened for %s."), - fh_handle_name (h), h->class->name); + handle_get_name (h), h->class->name); return NULL; } msg (VM (1), _("%s: Opening system-file handle %s for reading."), - fh_handle_filename (h), fh_handle_name (h)); + handle_get_filename (h), handle_get_name (h)); /* Open the physical disk file. */ ext = xmalloc (sizeof (struct sfm_fhuser_ext)); - ext->file = fn_open (h->norm_fn, "rb"); + ext->file = fn_open (handle_get_filename (h), "rb"); if (ext->file == NULL) { msg (ME, _("An error occurred while opening \"%s\" for reading " - "as a system file: %s."), h->fn, strerror (errno)); + "as a system file: %s."), + handle_get_filename (h), strerror (errno)); err_cond_fail (); free (ext); return NULL; @@ -324,15 +286,15 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) if (wv == NULL) lose ((ME, _("%s: Weighting variable may not be a continuation of " - "a long string variable."), h->fn)); + "a long string variable."), handle_get_filename (h))); else if (wv->type == ALPHA) lose ((ME, _("%s: Weighting variable may not be a string variable."), - h->fn)); + handle_get_filename (h))); - strcpy (ext->dict->weight_var, wv->name); + dict_set_weight (ext->dict, wv); } else - ext->dict->weight_var[0] = 0; + dict_set_weight (ext->dict, NULL); /* Read records of types 3, 4, 6, and 7. */ for (;;) @@ -352,8 +314,9 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) case 4: lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 " - "records must always immediately follow type 3 records."), - h->fn)); + "records must always immediately follow type 3 " + "records."), + handle_get_filename (h))); case 6: if (!read_documents (h)) @@ -380,13 +343,6 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) bswap_int32 (&data.count); } - /*if(data.size != sizeof(int32) && data.size != sizeof(flt64)) - lose((ME, "%s: Element size in record type 7, subtype %d, is " - "not either the size of IN (%d) or OBS (%d); actual value " - "is %d.", - h->fn, data.subtype, sizeof(int32), sizeof(flt64), - data.size)); */ - switch (data.subtype) { case 3: @@ -407,7 +363,8 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) default: msg (MW, _("%s: Unrecognized record type 7, subtype %d " - "encountered in system file."), h->fn, data.subtype); + "encountered in system file."), + handle_get_filename (h), data.subtype); skip = 1; } @@ -430,7 +387,8 @@ sfm_read_dictionary (struct file_handle * h, struct sfm_read_info * inf) } default: - lose ((ME, _("%s: Unrecognized record type %d."), h->fn, rec_type)); + lose ((ME, _("%s: Unrecognized record type %d."), + handle_get_filename (h), rec_type)); } } @@ -449,9 +407,9 @@ lossage: msg (VM (1), _("Error reading system-file header.")); free (var_by_index); - fn_close (h->fn, ext->file); + fn_close (handle_get_filename (h), ext->file); if (ext && ext->dict) - free_dictionary (ext->dict); + dict_destroy (ext->dict); free (ext); h->class = NULL; h->ext = NULL; @@ -471,8 +429,8 @@ read_machine_int32_info (struct file_handle * h, int size, int count) if (size != sizeof (int32) || count != 8) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " - "subtype 3. Expected size %d, count 8."), - h->fn, size, count, sizeof (int32))); + "subtype 3. Expected size %d, count 8."), + handle_get_filename (h), size, count, sizeof (int32))); assertive_bufread (h, data, sizeof data, 0); if (ext->reverse_endian) @@ -484,7 +442,8 @@ read_machine_int32_info (struct file_handle * h, int size, int count) if (data[4] != 1) lose ((ME, _("%s: Floating-point representation in system file is not " "IEEE-754. PSPP cannot convert between floating-point " - "formats."), h->fn)); + "formats."), + handle_get_filename (h))); #endif /* PORTME: Check recorded file endianness against intuited file @@ -498,16 +457,19 @@ read_machine_int32_info (struct file_handle * h, int size, int count) file_bigendian ^= 1; if (file_bigendian ^ (data[6] == 1)) lose ((ME, _("%s: File-indicated endianness (%s) does not match endianness " - "intuited from file header (%s)."), - h->fn, file_bigendian ? _("big-endian") : _("little-endian"), + "intuited from file header (%s)."), + handle_get_filename (h), + file_bigendian ? _("big-endian") : _("little-endian"), data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian") : _("unknown")))); /* PORTME: Character representation code. */ if (data[7] != 2 && data[7] != 3) lose ((ME, _("%s: File-indicated character representation code (%s) is not " - "ASCII."), h->fn, - data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown")))); + "ASCII."), + handle_get_filename (h), + (data[7] == 1 ? "EBCDIC" + : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))))); return 1; @@ -527,8 +489,8 @@ read_machine_flt64_info (struct file_handle * h, int size, int count) if (size != sizeof (flt64) || count != 3) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " - "subtype 4. Expected size %d, count 8."), - h->fn, size, count, sizeof (flt64))); + "subtype 4. Expected size %d, count 8."), + handle_get_filename (h), size, count, sizeof (flt64))); assertive_bufread (h, data, sizeof data, 0); if (ext->reverse_endian) @@ -545,7 +507,7 @@ read_machine_flt64_info (struct file_handle * h, int size, int count) "for at least one of the three system values. SYSMIS: " "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: " "%g, %g."), - h->fn, (double) data[0], (double) SYSMIS, + handle_get_filename (h), (double) data[0], (double) SYSMIS, (double) data[1], (double) FLT64_MAX, (double) data[2], (double) second_lowest_flt64); } @@ -563,31 +525,18 @@ read_header (struct file_handle * h, struct sfm_read_info * inf) struct sysfile_header hdr; /* Disk buffer. */ struct dictionary *dict; /* File dictionary. */ char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */ - int skip_amt; /* Amount of product name to omit. */ + int skip_amt = 0; /* Amount of product name to omit. */ int i; /* Create the dictionary. */ - dict = ext->dict = xmalloc (sizeof *dict); - dict->var = NULL; - dict->var_by_name = NULL; - dict->nvar = 0; - dict->N = 0; - dict->nval = -1; /* Unknown. */ - dict->n_splits = 0; - dict->splits = NULL; - dict->weight_var[0] = 0; - dict->weight_index = -1; - dict->filter_var[0] = 0; - dict->label = NULL; - dict->n_documents = 0; - dict->documents = NULL; + dict = ext->dict = dict_create (); /* Read header, check magic. */ assertive_bufread (h, &hdr, sizeof hdr, 0); if (0 != strncmp ("$FL2", hdr.rec_type, 4)) lose ((ME, _("%s: Bad magic. Proper system files begin with " "the four characters `$FL2'. This file will not be read."), - h->fn)); + handle_get_filename (h))); /* Check eye-catcher string. */ memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name); @@ -629,8 +578,8 @@ read_header (struct file_handle * h, struct sfm_read_info * inf) bswap_int32 (&hdr.layout_code); if (hdr.layout_code != 2) lose ((ME, _("%s: File layout code has unexpected value %d. Value " - "should be 2, in big-endian or little-endian format."), - h->fn, hdr.layout_code)); + "should be 2, in big-endian or little-endian format."), + handle_get_filename (h), hdr.layout_code)); ext->reverse_endian = 1; bswap_int32 (&hdr.case_size); @@ -645,39 +594,43 @@ read_header (struct file_handle * h, struct sfm_read_info * inf) if (hdr.case_size <= 0 || ext->case_size > (INT_MAX / (int) sizeof (union value) / 2)) lose ((ME, _("%s: Number of elements per case (%d) is not between 1 " - "and %d."), h->fn, hdr.case_size, INT_MAX / sizeof (union value) / 2)); + "and %d."), + handle_get_filename (h), hdr.case_size, + INT_MAX / sizeof (union value) / 2)); ext->compressed = hdr.compressed; ext->weight_index = hdr.weight_index - 1; if (hdr.weight_index < 0 || hdr.weight_index > hdr.case_size) lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " - "and number of elements per case (%d)."), - h->fn, hdr.weight_index, ext->case_size)); + "and number of elements per case (%d)."), + handle_get_filename (h), hdr.weight_index, ext->case_size)); ext->ncases = hdr.ncases; if (ext->ncases < -1 || ext->ncases > INT_MAX / 2) lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and " - "%d."), h->fn, (long) ext->ncases, INT_MAX / 2)); + "%d."), handle_get_filename (h), (long) ext->ncases, INT_MAX / 2)); ext->bias = hdr.bias; if (ext->bias != 100.0) corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual " - "value of 100."), h->fn, ext->bias); + "value of 100."), + handle_get_filename (h), ext->bias); /* Make a file label only on the condition that the given label is not all spaces or nulls. */ { int i; - dict->label = NULL; for (i = sizeof hdr.file_label - 1; i >= 0; i--) if (!isspace ((unsigned char) hdr.file_label[i]) && hdr.file_label[i] != 0) { - dict->label = xmalloc (i + 2); - memcpy (dict->label, hdr.file_label, i + 1); - dict->label[i + 1] = 0; + char *label = xmalloc (i + 2); + memcpy (label, hdr.file_label, i + 1); + label[i + 1] = 0; + dict_set_label (dict, label); + free (label); break; } } @@ -715,14 +668,9 @@ lossage: } /* Reads most of the dictionary from file H; also fills in the - associated VAR_BY_INDEX array. - - Note: the dictionary returned by this function has an invalid NVAL - element, also the VAR[] array does not have the FV and LV elements - set, however the NV elements *are* set. This is because the caller - will probably modify the dictionary before reading it in from the - file. Also, the get.* elements are set to appropriate values to - allow the file to be read. */ + associated VAR_BY_INDEX array. The get.* elements in the + created dictionary are set to appropriate values to allow the + file to be read. */ static int read_variables (struct file_handle * h, struct variable *** var_by_index) { @@ -736,7 +684,6 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) int next_value = 0; /* Index to next `value' structure. */ /* Allocate variables. */ - dict->var = xmalloc (sizeof *dict->var * ext->case_size); *var_by_index = xmalloc (sizeof **var_by_index * ext->case_size); /* Read in the entry for each variable and use the info to @@ -744,6 +691,7 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) for (i = 0; i < ext->case_size; i++) { struct variable *vv; + char name[9]; int j; assertive_bufread (h, &sv, sizeof sv, 0); @@ -760,7 +708,8 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) if (sv.rec_type != 2) lose ((ME, _("%s: position %d: Bad record type (%d); " - "the expected value was 2."), h->fn, i, sv.rec_type)); + "the expected value was 2."), + handle_get_filename (h), i, sv.rec_type)); /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -769,7 +718,8 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) { if (sv.type != -1) lose ((ME, _("%s: position %d: String variable does not have " - "proper number of continuation records."), h->fn, i)); + "proper number of continuation records."), + handle_get_filename (h), i)); (*var_by_index)[i] = NULL; long_string_count--; @@ -777,40 +727,37 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) } else if (sv.type == -1) lose ((ME, _("%s: position %d: Superfluous long string continuation " - "record."), h->fn, i)); + "record."), + handle_get_filename (h), i)); /* Check fields for validity. */ if (sv.type < 0 || sv.type > 255) lose ((ME, _("%s: position %d: Bad variable type code %d."), - h->fn, i, sv.type)); + handle_get_filename (h), i, sv.type)); if (sv.has_var_label != 0 && sv.has_var_label != 1) lose ((ME, _("%s: position %d: Variable label indicator field is not " - "0 or 1."), h->fn, i)); + "0 or 1."), handle_get_filename (h), i)); if (sv.n_missing_values < -3 || sv.n_missing_values > 3 || sv.n_missing_values == -1) lose ((ME, _("%s: position %d: Missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3."), h->fn, i)); - - /* Construct internal variable structure, initialize critical bits. */ - vv = (*var_by_index)[i] = dict->var[dict->nvar++] = xmalloc (sizeof *vv); - vv->index = dict->nvar - 1; - vv->foo = -1; - vv->label = NULL; - vv->val_lab = NULL; + "-3, -2, 0, 1, 2, or 3."), handle_get_filename (h), i)); /* Copy first character of variable name. */ if (!isalpha ((unsigned char) sv.name[0]) && sv.name[0] != '@' && sv.name[0] != '#') lose ((ME, _("%s: position %d: Variable name begins with invalid " - "character."), h->fn, i)); + "character."), + handle_get_filename (h), i)); if (islower ((unsigned char) sv.name[0])) msg (MW, _("%s: position %d: Variable name begins with lowercase letter " - "%c."), h->fn, i, sv.name[0]); + "%c."), + handle_get_filename (h), i, sv.name[0]); if (sv.name[0] == '#') msg (MW, _("%s: position %d: Variable name begins with octothorpe " "(`#'). Scratch variables should not appear in system " - "files."), h->fn, i); - vv->name[0] = toupper ((unsigned char) (sv.name[0])); + "files."), + handle_get_filename (h), i); + name[0] = toupper ((unsigned char) (sv.name[0])); /* Copy remaining characters of variable name. */ for (j = 1; j < 8; j++) @@ -822,39 +769,34 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) else if (islower (c)) { msg (MW, _("%s: position %d: Variable name character %d is " - "lowercase letter %c."), h->fn, i, j + 1, sv.name[j]); - vv->name[j] = toupper ((unsigned char) (c)); + "lowercase letter %c."), + handle_get_filename (h), i, j + 1, sv.name[j]); + name[j] = toupper ((unsigned char) (c)); } else if (isalnum (c) || c == '.' || c == '@' || c == '#' || c == '$' || c == '_') - vv->name[j] = c; + name[j] = c; else lose ((ME, _("%s: position %d: character `\\%03o' (%c) is not valid in a " - "variable name."), h->fn, i, c, c)); - } - vv->name[j] = 0; - - /* Set type, width, and `left' fields and allocate `value' - indices. */ - if (sv.type == 0) - { - vv->type = NUMERIC; - vv->width = 0; - vv->get.nv = 1; - vv->get.fv = next_value++; - vv->nv = 1; + "variable name."), + handle_get_filename (h), i, c, c)); } + name[j] = 0; + + /* Create variable. */ + vv = (*var_by_index)[i] = dict_create_var (dict, name, sv.type); + if (vv == NULL) + lose ((ME, _("%s: Duplicate variable name `%s' within system file."), + handle_get_filename (h), name)); + + /* Case reading data. */ + vv->get.fv = next_value; + if (sv.type == 0) + vv->get.nv = 1; else - { - vv->type = ALPHA; - vv->width = sv.type; - vv->nv = DIV_RND_UP (vv->width, MAX_SHORT_STRING); - vv->get.nv = DIV_RND_UP (vv->width, sizeof (flt64)); - vv->get.fv = next_value; - next_value += vv->get.nv; - long_string_count = vv->get.nv - 1; - } - vv->left = (vv->name[0] == '#'); + vv->get.nv = DIV_RND_UP (sv.type, sizeof (flt64)); + long_string_count = vv->get.nv - 1; + next_value += vv->get.nv; /* Get variable label, if any. */ if (sv.has_var_label == 1) @@ -870,7 +812,8 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) /* Check len. */ if (len < 0 || len > 255) lose ((ME, _("%s: Variable %s indicates variable label of invalid " - "length %d."), h->fn, vv->name, len)); + "length %d."), + handle_get_filename (h), vv->name, len)); /* Read label into variable structure. */ vv->label = bufread (h, NULL, ROUND_UP (len, sizeof (int32)), len + 1); @@ -886,7 +829,8 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " - "values."), h->fn, vv->name)); + "values."), + handle_get_filename (h), vv->name)); assertive_bufread (h, mv, sizeof *mv * abs (sv.n_missing_values), 0); @@ -910,7 +854,8 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " - "values specified as a range."), h->fn, vv->name)); + "values specified as a range."), + handle_get_filename (h), vv->name)); if (mv[0] == ext->lowest) { @@ -947,32 +892,17 @@ read_variables (struct file_handle * h, struct variable *** var_by_index) /* Some consistency checks. */ if (long_string_count != 0) lose ((ME, _("%s: Long string continuation records omitted at end of " - "dictionary."), h->fn)); + "dictionary."), + handle_get_filename (h))); if (next_value != ext->case_size) lose ((ME, _("%s: System file header indicates %d variable positions but " - "%d were read from file."), h->fn, ext->case_size, next_value)); - dict->var = xrealloc (dict->var, sizeof *dict->var * dict->nvar); - - /* Construct AVL tree of dictionary in order to speed up later - processing and to check for duplicate varnames. */ - dict->var_by_name = avl_create (NULL, cmp_variable, NULL); - for (i = 0; i < dict->nvar; i++) - if (NULL != avl_insert (dict->var_by_name, dict->var[i])) - lose ((ME, _("%s: Duplicate variable name `%s' within system file."), - h->fn, dict->var[i]->name)); + "%d were read from file."), + handle_get_filename (h), ext->case_size, next_value)); return 1; lossage: - for (i = 0; i < dict->nvar; i++) - { - free (dict->var[i]->label); - free (dict->var[i]); - } - free (dict->var); - if (dict->var_by_name) - avl_destroy (dict->var_by_name, NULL); - free (dict); + dict_destroy (dict); ext->dict = NULL; return 0; @@ -983,12 +913,10 @@ lossage: static int parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct variable *vv) { - if ((size_t) ((s >> 16) & 0xff) - >= sizeof translate_fmt / sizeof *translate_fmt) + v->type = translate_fmt ((s >> 16) & 0xff); + if (v->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), - h->fn, (s >> 16) & 0xff)); - - v->type = translate_fmt[(s >> 16) & 0xff]; + handle_get_filename (h), (s >> 16) & 0xff)); v->w = (s >> 8) & 0xff; v->d = s & 0xff; @@ -996,10 +924,11 @@ parse_format_spec (struct file_handle *h, int32 s, struct fmt_spec *v, struct va if (v->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), - h->fn, (s >> 16) & 0xff)); + handle_get_filename (h), (s >> 16) & 0xff)); if ((vv->type == ALPHA) ^ ((formats[v->type].cat & FCAT_STRING) != 0)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), - h->fn, vv->type == ALPHA ? _("String") : _("Numeric"), + handle_get_filename (h), + vv->type == ALPHA ? _("String") : _("Numeric"), vv->name, formats[v->type].cat & FCAT_STRING ? _("string") : _("numeric"), formats[v->type].name)); @@ -1016,8 +945,14 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) { struct sfm_fhuser_ext *ext = h->ext; /* File extension record. */ - flt64 *raw_label = NULL; /* Array of raw label values. */ - struct value_label **cooked_label = NULL; /* Array of cooked labels. */ + struct label + { + unsigned char raw_value[8]; /* Value as uninterpreted bytes. */ + union value value; /* Value. */ + char *label; /* Null-terminated label string. */ + }; + + struct label *labels = NULL; int32 n_labels; /* Number of labels. */ struct variable **var = NULL; /* Associated variables. */ @@ -1035,34 +970,28 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) bswap_int32 (&n_labels); /* Allocate memory. */ - raw_label = xmalloc (sizeof *raw_label * n_labels); - cooked_label = xmalloc (sizeof *cooked_label * n_labels); + labels = xmalloc (n_labels * sizeof *labels); for (i = 0; i < n_labels; i++) - cooked_label[i] = NULL; + labels[i].label = NULL; - /* Read each value/label tuple. */ + /* Read each value/label tuple into labels[]. */ for (i = 0; i < n_labels; i++) { - flt64 value; + struct label *label = labels + i; unsigned char label_len; + size_t padded_len; - int rem; + /* Read value. */ + assertive_bufread (h, label->raw_value, sizeof label->raw_value, 0); - /* Read value, label length. */ - assertive_bufread (h, &value, sizeof value, 0); - assertive_bufread (h, &label_len, 1, 0); - memcpy (&raw_label[i], &value, sizeof value); + /* Read label length. */ + assertive_bufread (h, &label_len, sizeof label_len, 0); + padded_len = ROUND_UP (label_len + 1, sizeof (flt64)); - /* Read label. */ - cooked_label[i] = xmalloc (sizeof **cooked_label); - cooked_label[i]->s = xmalloc (label_len + 1); - assertive_bufread (h, cooked_label[i]->s, label_len, 0); - cooked_label[i]->s[label_len] = 0; - - /* Skip padding. */ - rem = REM_RND_UP (label_len + 1, sizeof (flt64)); - if (rem) - assertive_bufread (h, &value, rem, 0); + /* Read label, padding. */ + label->label = xmalloc (padded_len + 1); + assertive_bufread (h, label->label, padded_len - 1, 0); + label->label[label_len] = 0; } /* Second step: Read the type 4 record that has the list of @@ -1078,7 +1007,8 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) if (rec_type != 4) lose ((ME, _("%s: Variable index record (type 4) does not immediately " - "follow value label record (type 3) as it ought."), h->fn)); + "follow value label record (type 3) as it should."), + handle_get_filename (h))); } /* Read number of variables associated with value label from type 4 @@ -1086,15 +1016,13 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) assertive_bufread (h, &n_vars, sizeof n_vars, 0); if (ext->reverse_endian) bswap_int32 (&n_vars); - if (n_vars < 1 || n_vars > ext->dict->nvar) + if (n_vars < 1 || n_vars > dict_get_var_cnt (ext->dict)) lose ((ME, _("%s: Number of variables associated with a value label (%d) " - "is not between 1 and the number of variables (%d)."), - h->fn, n_vars, ext->dict->nvar)); - - /* Allocate storage. */ - var = xmalloc (sizeof *var * n_vars); + "is not between 1 and the number of variables (%d)."), + handle_get_filename (h), n_vars, dict_get_var_cnt (ext->dict))); /* Read the list of variables. */ + var = xmalloc (n_vars * sizeof *var); for (i = 0; i < n_vars; i++) { int32 var_index; @@ -1106,18 +1034,20 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) bswap_int32 (&var_index); if (var_index < 1 || var_index > ext->case_size) lose ((ME, _("%s: Variable index associated with value label (%d) is " - "not between 1 and the number of values (%d)."), - h->fn, var_index, ext->case_size)); + "not between 1 and the number of values (%d)."), + handle_get_filename (h), var_index, ext->case_size)); /* Make sure it's a real variable. */ v = var_by_index[var_index - 1]; if (v == NULL) - lose ((ME, _("%s: Variable index associated with value label (%d) refers " - "to a continuation of a string variable, not to an actual " - "variable."), h->fn, var_index)); + lose ((ME, _("%s: Variable index associated with value label (%d) " + "refers to a continuation of a string variable, not to " + "an actual variable."), + handle_get_filename (h), var_index)); if (v->type == ALPHA && v->width > MAX_SHORT_STRING) - lose ((ME, _("%s: Value labels are not allowed on long string variables " - "(%s)."), h->fn, v->name)); + lose ((ME, _("%s: Value labels are not allowed on long string " + "variables (%s)."), + handle_get_filename (h), v->name)); /* Add it to the list of variables. */ var[i] = v; @@ -1127,73 +1057,69 @@ read_value_labels (struct file_handle * h, struct variable ** var_by_index) for (i = 1; i < n_vars; i++) if (var[i]->type != var[0]->type) lose ((ME, _("%s: Variables associated with value label are not all of " - "identical type. Variable %s has %s type, but variable %s has " - "%s type."), h->fn, + "identical type. Variable %s has %s type, but variable " + "%s has %s type."), + handle_get_filename (h), var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); - /* Create a value_label for each value/label tuple, now that we know - the desired type. */ - for (i = 0; i < n_labels; i++) + /* Fill in labels[].value, now that we know the desired type. */ + for (i = 0; i < n_labels; i++) { + struct label *label = labels + i; + if (var[0]->type == ALPHA) - { - const int copy_len = min (sizeof (flt64), MAX_SHORT_STRING); - memcpy (cooked_label[i]->v.s, (char *) &raw_label[i], copy_len); - if (MAX_SHORT_STRING > copy_len) - memset (&cooked_label[i]->v.s[copy_len], ' ', - MAX_SHORT_STRING - copy_len); - } else { - cooked_label[i]->v.f = raw_label[i]; - if (ext->reverse_endian) - bswap_flt64 (&cooked_label[i]->v.f); - } - cooked_label[i]->ref_count = n_vars; + { + const int copy_len = min (sizeof (label->raw_value), + sizeof (label->label)); + memcpy (label->value.s, label->raw_value, copy_len); + } else { + flt64 f; + assert (sizeof f == sizeof label->raw_value); + memcpy (&f, label->raw_value, sizeof f); + if (ext->reverse_endian) + bswap_flt64 (&f); + label->value.f = f; + } } - + /* Assign the value_label's to each variable. */ for (i = 0; i < n_vars; i++) { struct variable *v = var[i]; int j; - /* Create AVL tree if necessary. */ - if (!v->val_lab) - v->val_lab = avl_create (NULL, val_lab_cmp, (void *) (v->width)); - /* Add each label to the variable. */ for (j = 0; j < n_labels; j++) { - struct value_label *old = avl_replace (v->val_lab, cooked_label[j]); - if (old == NULL) + struct label *label = labels + j; + if (!val_labs_replace (v->val_labs, label->value, label->label)) continue; if (var[0]->type == NUMERIC) msg (MW, _("%s: File contains duplicate label for value %g for " - "variable %s."), h->fn, cooked_label[j]->v.f, v->name); + "variable %s."), + handle_get_filename (h), label->value.f, v->name); else msg (MW, _("%s: File contains duplicate label for value `%.*s' " - "for variable %s."), h->fn, v->width, - cooked_label[j]->v.s, v->name); - - free_value_label (old); + "for variable %s."), + handle_get_filename (h), v->width, label->value.s, v->name); } } - free (cooked_label); - free (raw_label); + for (i = 0; i < n_labels; i++) + free (labels[i].label); + free (labels); free (var); return 1; lossage: - if (cooked_label) - for (i = 0; i < n_labels; i++) - if (cooked_label[i]) - { - free (cooked_label[i]->s); - free (cooked_label[i]); - } - free (raw_label); + if (labels) + { + for (i = 0; i < n_labels; i++) + free (labels[i].label); + free (labels); + } free (var); return 0; } @@ -1212,9 +1138,11 @@ bufread (struct file_handle * h, void *buf, size_t nbytes, size_t minalloc) if (1 != fread (buf, nbytes, 1, ext->file)) { if (ferror (ext->file)) - msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno)); + msg (ME, _("%s: Reading system file: %s."), + handle_get_filename (h), strerror (errno)); else - corrupt_msg (ME, _("%s: Unexpected end of file."), h->fn); + corrupt_msg (ME, _("%s: Unexpected end of file."), + handle_get_filename (h)); return NULL; } return buf; @@ -1229,20 +1157,26 @@ read_documents (struct file_handle * h) struct sfm_fhuser_ext *ext = h->ext; struct dictionary *dict = ext->dict; int32 n_lines; + char *documents; - if (dict->documents != NULL) - lose ((ME, _("%s: System file contains multiple type 6 (document) records."), - h->fn)); + if (dict_get_documents (dict) != NULL) + lose ((ME, _("%s: System file contains multiple " + "type 6 (document) records."), + handle_get_filename (h))); assertive_bufread (h, &n_lines, sizeof n_lines, 0); - dict->n_documents = n_lines; - if (dict->n_documents <= 0) - lose ((ME, _("%s: Number of document lines (%ld) must be greater than 0."), - h->fn, (long) dict->n_documents)); - - dict->documents = bufread (h, NULL, 80 * n_lines, 0); - if (dict->documents == NULL) + if (n_lines <= 0) + lose ((ME, _("%s: Number of document lines (%ld) " + "must be greater than 0."), + handle_get_filename (h), (long) n_lines)); + + documents = bufread (h, NULL, 80 * n_lines, n_lines * 80 + 1); + /* FIXME? Run through asciify. */ + if (documents == NULL) return 0; + documents[80 * n_lines] = '\0'; + dict_set_documents (dict, documents); + free (documents); return 1; lossage: @@ -1250,7 +1184,6 @@ lossage: } #if GLOBAL_DEBUGGING -#define DEBUGGING 1 #include "debug-print.h" /* Displays dictionary DICT on stdout. */ void @@ -1266,12 +1199,10 @@ dump_dictionary (struct dictionary * dict) int n, j; debug_printf ((" var %s", v->name)); - /*debug_printf (("(indices:%d,%d)", v->index, v->foo));*/ debug_printf (("(type:%s,%d)", (v->type == NUMERIC ? _("num") : (v->type == ALPHA ? _("str") : "!!!")), v->width)); debug_printf (("(fv:%d,%d)", v->fv, v->nv)); - /*debug_printf (("(get.fv:%d,%d)", v->get.fv, v->get.nv));*/ debug_printf (("(left:%s)(miss:", v->left ? _("left") : _("right"))); switch (v->miss_type) @@ -1348,7 +1279,8 @@ buffer_input (struct file_handle * h) amt = fread (ext->buf, sizeof *ext->buf, 128, ext->file); if (ferror (ext->file)) { - msg (ME, _("%s: Error reading file: %s."), h->fn, strerror (errno)); + msg (ME, _("%s: Error reading file: %s."), + handle_get_filename (h), strerror (errno)); return 0; } ext->ptr = ext->buf; @@ -1387,7 +1319,8 @@ read_compressed_data (struct file_handle * h, flt64 * temp) /* Code 252 is end of file. */ if (temp_beg != temp) lose ((ME, _("%s: Compressed data is corrupted. Data ends " - "partway through a case."), h->fn)); + "in partial case."), + handle_get_filename (h))); goto lossage; case 253: /* Code 253 indicates that the value is stored explicitly @@ -1395,7 +1328,8 @@ read_compressed_data (struct file_handle * h, flt64 * temp) if (ext->ptr == NULL || ext->ptr >= ext->end) if (!buffer_input (h)) { - lose ((ME, _("%s: Unexpected end of file."), h->fn)); + lose ((ME, _("%s: Unexpected end of file."), + handle_get_filename (h))); goto lossage; } memcpy (temp++, ext->ptr++, sizeof *temp); @@ -1436,7 +1370,8 @@ read_compressed_data (struct file_handle * h, flt64 * temp) if (!buffer_input (h)) { if (temp_beg != temp) - lose ((ME, _("%s: Unexpected end of file."), h->fn)); + lose ((ME, _("%s: Unexpected end of file."), + handle_get_filename (h))); goto lossage; } memcpy (ext->x, ext->ptr++, sizeof *temp); @@ -1473,10 +1408,6 @@ sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * d int i; - /* Make sure the caller remembered to finish polishing the - dictionary returned by sfm_read_dictionary(). */ - assert (dict->nval > 0); - /* The first concern is to obtain a full case relative to the data file. (Cases in the data file have no particular relationship to cases in the active file.) */ @@ -1490,9 +1421,11 @@ sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * d if (amt != nbytes) { if (ferror (ext->file)) - msg (ME, _("%s: Reading system file: %s."), h->fn, strerror (errno)); + msg (ME, _("%s: Reading system file: %s."), + handle_get_filename (h), strerror (errno)); else if (amt != 0) - msg (ME, _("%s: Partial record at end of system file."), h->fn); + msg (ME, _("%s: Partial record at end of system file."), + handle_get_filename (h)); goto lossage; } } @@ -1501,9 +1434,9 @@ sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * d /* Translate a case in data file format to a case in active file format. */ - for (i = 0; i < dict->nvar; i++) + for (i = 0; i < dict_get_var_cnt (dict); i++) { - struct variable *v = dict->var[i]; + struct variable *v = dict_get_var (dict, i); if (v->get.fv == -1) continue;