X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=b5fa621220db6e1358cfbac663cb6e2d546ab353;hb=b64685d06f8db1aff292ec409abe25f8a483d775;hp=58a885ae4a4be3e4b97f5ab2e6bd800fe395951c;hpb=77e551d23575da6b89f866612ab39c2b0497c9be;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 58a885ae..b5fa6212 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -30,59 +31,65 @@ #include #include #include +#include +#include #include "sys-file-reader.h" #include "sfm-private.h" #include "case.h" #include "dictionary.h" #include "file-handle-def.h" -#include "filename.h" +#include "file-name.h" #include "format.h" #include "value-labels.h" #include "variable.h" +#include "value.h" #include "gettext.h" #define _(msgid) gettext (msgid) /* System file reader. */ struct sfm_reader - { - struct file_handle *fh; /* File handle. */ - FILE *file; /* File stream. */ - - int reverse_endian; /* 1=file has endianness opposite us. */ - int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ - int value_cnt; /* Number of `union values's per case. */ - long case_cnt; /* Number of cases, -1 if unknown. */ - int compressed; /* 1=compressed, 0=not compressed. */ - double bias; /* Compression bias, usually 100.0. */ - int weight_idx; /* 0-based index of weighting variable, or -1. */ - bool ok; /* False after an I/O error or corrupt data. */ - - /* Variables. */ - struct sfm_var *vars; /* Variables. */ - - /* File's special constants. */ - flt64 sysmis; - flt64 highest; - flt64 lowest; - - /* Decompression buffer. */ - flt64 *buf; /* Buffer data. */ - flt64 *ptr; /* Current location in buffer. */ - flt64 *end; /* End of buffer data. */ - - /* Compression instruction octet. */ - unsigned char x[8]; /* Current instruction octet. */ - unsigned char *y; /* Location in current instruction octet. */ - }; +{ + struct file_handle *fh; /* File handle. */ + FILE *file; /* File stream. */ + + int reverse_endian; /* 1=file has endianness opposite us. */ + int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ + int value_cnt; /* Number of `union values's per case. */ + long case_cnt; /* Number of cases, -1 if unknown. */ + int compressed; /* 1=compressed, 0=not compressed. */ + double bias; /* Compression bias, usually 100.0. */ + int weight_idx; /* 0-based index of weighting variable, or -1. */ + bool ok; /* False after an I/O error or corrupt data. */ + bool has_vls; /* True if the file has one or more Very Long Strings*/ + + /* Variables. */ + struct hsh_table *var_hash; + struct variable **svars; + + /* File's special constants. */ + flt64 sysmis; + flt64 highest; + flt64 lowest; + + /* Decompression buffer. */ + flt64 *buf; /* Buffer data. */ + flt64 *ptr; /* Current location in buffer. */ + flt64 *end; /* End of buffer data. */ + + /* Compression instruction octet. */ + unsigned char x[8]; /* Current instruction octet. */ + unsigned char *y; /* Location in current instruction octet. */ +}; /* A variable in a system file. */ struct sfm_var - { - int width; /* 0=numeric, otherwise string width. */ - int fv; /* Index into case. */ - }; +{ + char name[SHORT_NAME_LEN + 1]; /* name */ + int width; /* 0=numeric, otherwise string width. */ + int fv; /* Index into case. */ +}; /* Utilities. */ @@ -119,21 +126,26 @@ static void corrupt_msg (int class, const char *format,...) PRINTF_FORMAT (2, 3); -/* Displays a corrupt sysfile error. */ -static void -corrupt_msg (int class, const char *format,...) + /* Displays a corrupt sysfile error. */ + static void + corrupt_msg (int class, const char *format,...) { - struct error e; + struct msg m; va_list args; + struct string text; - e.class = class; - e.where.filename = NULL; - e.where.line_number = 0; - e.title = _("corrupt system file: "); - + ds_create (&text, _("corrupt system file: ")); va_start (args, format); - err_vmsg (&e, format, args); + ds_vprintf (&text, format, args); va_end (args); + + m.category = msg_class_to_category (class); + m.severity = msg_class_to_severity (class); + m.where.file_name = NULL; + m.where.line_number = 0; + m.text = ds_c_str (&text); + + msg_emit (&m); } /* Closes a system file after we're done with it. */ @@ -145,16 +157,16 @@ sfm_close_reader (struct sfm_reader *r) if (r->file) { - if (fn_close (fh_get_filename (r->fh), r->file) == EOF) + if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) msg (ME, _("%s: Closing system file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); r->file = NULL; } if (r->fh != NULL) fh_close (r->fh, "system file", "rs"); - - free (r->vars); + + hsh_destroy(r->var_hash); free (r->buf); free (r); } @@ -196,6 +208,123 @@ static int fread_ok (struct sfm_reader *, void *, size_t); goto error; \ } while (0) + +struct name_pair +{ + char *shortname; + char *longname; +}; + +static int +pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED) +{ + int i; + + const struct name_pair *p1 = _p1; + const struct name_pair *p2 = _p2; + + char buf1[SHORT_NAME_LEN + 1]; + char buf2[SHORT_NAME_LEN + 1]; + + memset(buf1, 0, SHORT_NAME_LEN + 1); + memset(buf2, 0, SHORT_NAME_LEN + 1); + + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf1[i] = p1->shortname[i]; + if ( '\0' == buf1[i]) + break; + } + + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf2[i] = p2->shortname[i]; + if ( '\0' == buf2[i]) + break; + } + + return strncmp(buf1, buf2, SHORT_NAME_LEN); +} + +static unsigned int +pair_sn_hash(const void *_p, void *aux UNUSED) +{ + int i; + const struct name_pair *p = _p; + char buf[SHORT_NAME_LEN + 1]; + + memset(buf, 0, SHORT_NAME_LEN + 1); + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf[i] = p->shortname[i]; + if ( '\0' == buf[i]) + break; + } + + return hsh_hash_bytes(buf, strlen(buf)); +} + +static void +pair_sn_free(void *p, void *aux UNUSED) +{ + free(p); +} + + + +/* A hsh_compare_func that orders variables A and B by their + names. */ +static int +compare_var_shortnames (const void *a_, const void *b_, void *foo UNUSED) +{ + int i; + const struct variable *a = a_; + const struct variable *b = b_; + + char buf1[SHORT_NAME_LEN + 1]; + char buf2[SHORT_NAME_LEN + 1]; + + memset(buf1, 0, SHORT_NAME_LEN + 1); + memset(buf2, 0, SHORT_NAME_LEN + 1); + + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf1[i] = a->short_name[i]; + if ( '\0' == buf1[i]) + break; + } + + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf2[i] = b->short_name[i]; + if ( '\0' == buf2[i]) + break; + } + + return strncmp(buf1, buf2, SHORT_NAME_LEN); +} + +/* A hsh_hash_func that hashes variable V based on its name. */ +static unsigned +hash_var_shortname (const void *v_, void *foo UNUSED) +{ + int i; + const struct variable *v = v_; + char buf[SHORT_NAME_LEN + 1]; + + memset(buf, 0, SHORT_NAME_LEN + 1); + for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) + { + buf[i] = v->short_name[i]; + if ( '\0' == buf[i]) + break; + } + + return hsh_hash_bytes(buf, strlen(buf)); +} + + + /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the @@ -207,6 +336,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct sfm_reader *r = NULL; struct variable **var_by_idx = NULL; + /* The data in record 7(14) */ + char *subrec14data = 0; + + /* A hash table of long variable names indexed by short name */ + struct hsh_table *short_to_long = NULL; + + *dict = dict_create (); if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) goto error; @@ -214,7 +350,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, /* Create and initialize reader. */ r = xmalloc (sizeof *r); r->fh = fh; - r->file = fn_open (fh_get_filename (fh), "rb"); + r->file = fn_open (fh_get_file_name (fh), "rb"); r->reverse_endian = 0; r->fix_specials = 0; @@ -224,8 +360,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->bias = 100.0; r->weight_idx = -1; r->ok = true; + r->has_vls = false; + r->svars = 0; - r->vars = NULL; + r->var_hash = hsh_create(4, compare_var_shortnames, hash_var_shortname, 0, 0); r->sysmis = -FLT64_MAX; r->highest = FLT64_MAX; @@ -239,7 +377,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { msg (ME, _("An error occurred while opening \"%s\" for reading " "as a system file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); goto error; } @@ -256,7 +394,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (r->weight_idx < 0 || r->weight_idx >= r->value_cnt) lose ((ME, _("%s: Index of weighting variable (%d) is not between 0 " "and number of elements per case (%d)."), - fh_get_filename (r->fh), r->weight_idx, r->value_cnt)); + fh_get_file_name (r->fh), r->weight_idx, r->value_cnt)); weight_var = var_by_idx[r->weight_idx]; @@ -264,10 +402,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (weight_var == NULL) lose ((ME, _("%s: Weighting variable may not be a continuation of " - "a long string variable."), fh_get_filename (fh))); + "a long string variable."), fh_get_file_name (fh))); else if (weight_var->type == ALPHA) lose ((ME, _("%s: Weighting variable may not be a string variable."), - fh_get_filename (fh))); + fh_get_file_name (fh))); dict_set_weight (*dict, weight_var); } @@ -283,6 +421,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, if (r->reverse_endian) bswap_int32 (&rec_type); + switch (rec_type) { case 3: @@ -294,7 +433,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, lose ((ME, _("%s: Orphaned variable index record (type 4). Type 4 " "records must always immediately follow type 3 " "records."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); case 6: if (!read_documents (r, *dict)) @@ -304,11 +443,11 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, case 7: { struct - { - int32_t subtype P; - int32_t size P; - int32_t count P; - } + { + int32_t subtype P; + int32_t size P; + int32_t count P; + } data; unsigned long bytes; @@ -322,9 +461,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, bswap_int32 (&data.count); } bytes = data.size * data.count; + if (bytes < data.size || bytes < data.count) lose ((ME, "%s: Record type %d subtype %d too large.", - fh_get_filename (r->fh), rec_type, data.subtype)); + fh_get_file_name (r->fh), rec_type, data.subtype)); switch (data.subtype) { @@ -347,12 +487,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { const int n_vars = data.count / 3 ; int i; - if ( data.count % 3 || n_vars > dict_get_var_cnt(*dict) ) + if ( data.count % 3 || n_vars != dict_get_var_cnt(*dict) ) { msg (MW, _("%s: Invalid subrecord length. " "Record: 7; Subrecord: 11"), - fh_get_filename (r->fh)); + fh_get_file_name (r->fh)); skip = 1; + break; } for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i ) @@ -369,6 +510,16 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, assertive_buf_read (r, ¶ms, sizeof(params), 0); + if ( ! measure_is_valid(params.measure) + || + ! alignment_is_valid(params.align)) + { + msg(MW, + _("Invalid variable display parameters. Default parameters substituted."), + fh_get_file_name(r->fh)); + continue; + } + v = dict_get_var(*dict, i); v->measure = params.measure; @@ -380,23 +531,31 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, case 13: /* SPSS 12.0 Long variable name map */ { - char *buf, *short_name, *save_ptr; + char *short_name, *save_ptr; int idx; + r->has_vls = true; + /* Read data. */ - buf = xmalloc (bytes + 1); - if (!buf_read (r, buf, bytes, 0)) + subrec14data = xmalloc (bytes + 1); + if (!buf_read (r, subrec14data, bytes, 0)) { - free (buf); goto error; } - buf[bytes] = '\0'; + subrec14data[bytes] = '\0'; + + short_to_long = hsh_create(4, + pair_sn_compare, + pair_sn_hash, + pair_sn_free, + 0); /* Parse data. */ - for (short_name = strtok_r (buf, "=", &save_ptr), idx = 0; + for (short_name = strtok_r (subrec14data, "=", &save_ptr), idx = 0; short_name != NULL; short_name = strtok_r (NULL, "=", &save_ptr), idx++) { + struct name_pair *pair ; char *long_name = strtok_r (NULL, "\t", &save_ptr); struct variable *v; @@ -405,14 +564,14 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { msg (MW, _("%s: Trailing garbage in long variable " "name map."), - fh_get_filename (r->fh)); + fh_get_file_name (r->fh)); break; } if (!var_is_valid_name (long_name, false)) { msg (MW, _("%s: Long variable mapping to invalid " "variable name `%s'."), - fh_get_filename (r->fh), long_name); + fh_get_file_name (r->fh), long_name); break; } @@ -422,16 +581,16 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { msg (MW, _("%s: Long variable mapping for " "nonexistent variable %s."), - fh_get_filename (r->fh), short_name); + fh_get_file_name (r->fh), short_name); break; } /* Identify any duplicates. */ if ( compare_var_names(short_name, long_name, 0) && - NULL != dict_lookup_var (*dict, long_name)) + NULL != dict_lookup_var (*dict, long_name)) lose ((ME, _("%s: Duplicate long variable name `%s' " "within system file."), - fh_get_filename (r->fh), long_name)); + fh_get_file_name (r->fh), long_name)); /* Set long name. @@ -441,6 +600,17 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, dict_rename_var (*dict, v, long_name); var_set_short_name (v, short_name); + pair = xmalloc(sizeof *pair); + pair->shortname = short_name; + pair->longname = long_name; + hsh_insert(short_to_long, pair); +#if 0 + /* This messes up the processing of subtype 14 (below). + I'm not sure if it is needed anyway, so I'm removing it for + now. If it's needed, then it will need to be done after all the + records have been processed. --- JMD 27 April 2006 + */ + /* For compatability, make sure dictionary is in long variable name map order. In the common case, this has no effect, @@ -448,17 +618,128 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, variable name map are already in the same order. */ dict_reorder_var (*dict, v, idx); +#endif } + + } + break; + + case 14: + { + int j = 0; + bool eq_seen = false; + int i; - /* Free data. */ - free (buf); + /* Read data. */ + char *buffer = xmalloc (bytes + 1); + if (!buf_read (r, buffer, bytes, 0)) + { + free (buffer); + goto error; + } + buffer[bytes] = '\0'; + + + /* Note: SPSS v13 terminates this record with 00, + whereas SPSS v14 terminates it with 00 09. We must + accept either */ + for(i = 0; i < bytes ; ++i) + { + long int length; + static char name[SHORT_NAME_LEN + 1] = {0}; + static char len_str[6] ={0}; + + switch( buffer[i] ) + { + case '=': + eq_seen = true; + j = 0; + break; + case '\0': + length = strtol(len_str, 0, 10); + if ( length != LONG_MAX && length != LONG_MIN) + { + char *lookup_name = name; + int l; + int idx; + struct variable *v; + + if ( short_to_long ) + { + struct name_pair pair; + struct name_pair *p; + + pair.shortname = name; + p = hsh_find(short_to_long, &pair); + if ( p ) + lookup_name = p->longname; + } + + v = dict_lookup_var(*dict, lookup_name); + if ( !v ) + { + corrupt_msg(MW, + _("%s: No variable called %s but it is listed in length table."), + fh_get_file_name (r->fh), lookup_name); + + goto error; + + } + + l = length; + if ( v->width > EFFECTIVE_LONG_STRING_LENGTH ) + l -= EFFECTIVE_LONG_STRING_LENGTH; + else + l -= v->width; + + idx = v->index; + while ( l > 0 ) + { + struct variable *v_next; + v_next = dict_get_var(*dict, idx + 1); + + if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH ) + l -= EFFECTIVE_LONG_STRING_LENGTH; + else + l -= v_next->width; + + hsh_delete(r->var_hash, v_next); + + dict_delete_var(*dict, v_next); + } + + assert ( length > MAX_LONG_STRING ); + + v->width = length; + v->print.w = v->width; + v->write.w = v->width; + v->nv = DIV_RND_UP (length, MAX_SHORT_STRING); + } + eq_seen = false; + memset(name, 0, SHORT_NAME_LEN+1); + memset(len_str, 0, 6); + j = 0; + break; + case '\t': + break; + default: + if ( eq_seen ) + len_str[j] = buffer[i]; + else + name[j] = buffer[i]; + j++; + break; + } + } + free(buffer); + dict_compact_values(*dict); } break; default: msg (MW, _("%s: Unrecognized record type 7, subtype %d " "encountered in system file."), - fh_get_filename (r->fh), data.subtype); + fh_get_file_name (r->fh), data.subtype); skip = 1; } @@ -477,24 +758,30 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, int32_t filler; assertive_buf_read (r, &filler, sizeof filler, 0); + goto success; } default: corrupt_msg(MW, _("%s: Unrecognized record type %d."), - fh_get_filename (r->fh), rec_type); + fh_get_file_name (r->fh), rec_type); } } -success: + success: /* Come here on successful completion. */ + free (var_by_idx); + hsh_destroy(short_to_long); + free (subrec14data); return r; -error: + error: /* Come here on unsuccessful completion. */ sfm_close_reader (r); free (var_by_idx); + hsh_destroy(short_to_long); + free (subrec14data); if (*dict != NULL) { dict_destroy (*dict); @@ -515,7 +802,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) if (size != sizeof (int32_t) || count != 8) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 3. Expected size %d, count 8."), - fh_get_filename (r->fh), size, count, sizeof (int32_t))); + fh_get_file_name (r->fh), size, count, sizeof (int32_t))); assertive_buf_read (r, data, sizeof data, 0); if (r->reverse_endian) @@ -527,7 +814,7 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) lose ((ME, _("%s: Floating-point representation in system file is not " "IEEE-754. PSPP cannot convert between floating-point " "formats."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); #else #error Add support for your floating-point format. #endif @@ -542,22 +829,22 @@ read_machine_int32_info (struct sfm_reader *r, int size, int count) if (file_bigendian ^ (data[6] == 1)) lose ((ME, _("%s: File-indicated endianness (%s) does not match " "endianness intuited from file header (%s)."), - fh_get_filename (r->fh), + fh_get_file_name (r->fh), file_bigendian ? _("big-endian") : _("little-endian"), data[6] == 1 ? _("big-endian") : (data[6] == 2 ? _("little-endian") - : _("unknown")))); + : _("unknown")))); /* PORTME: Character representation code. */ if (data[7] != 2 && data[7] != 3) lose ((ME, _("%s: File-indicated character representation code (%s) is " "not ASCII."), - fh_get_filename (r->fh), + fh_get_file_name (r->fh), (data[7] == 1 ? "EBCDIC" : (data[7] == 4 ? _("DEC Kanji") : _("Unknown"))))); return 1; -error: + error: return 0; } @@ -571,7 +858,7 @@ read_machine_flt64_info (struct sfm_reader *r, int size, int count) if (size != sizeof (flt64) || count != 3) lose ((ME, _("%s: Bad size (%d) or count (%d) field on record type 7, " "subtype 4. Expected size %d, count 8."), - fh_get_filename (r->fh), size, count, sizeof (flt64))); + fh_get_file_name (r->fh), size, count, sizeof (flt64))); assertive_buf_read (r, data, sizeof data, 0); if (r->reverse_endian) @@ -588,14 +875,14 @@ read_machine_flt64_info (struct sfm_reader *r, int size, int count) "for at least one of the three system values. SYSMIS: " "indicated %g, expected %g; HIGHEST: %g, %g; LOWEST: " "%g, %g."), - fh_get_filename (r->fh), (double) data[0], (double) SYSMIS, + fh_get_file_name (r->fh), (double) data[0], (double) SYSMIS, (double) data[1], (double) FLT64_MAX, (double) data[2], (double) second_lowest_flt64); } return 1; -error: + error: return 0; } @@ -613,7 +900,7 @@ read_header (struct sfm_reader *r, if (strncmp ("$FL2", hdr.rec_type, 4) != 0) lose ((ME, _("%s: Bad magic. Proper system files begin with " "the four characters `$FL2'. This file will not be read."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); /* Check eye-category.her string. */ memcpy (prod_name, hdr.prod_name, sizeof hdr.prod_name); @@ -655,10 +942,10 @@ read_header (struct sfm_reader *r, if (hdr.layout_code != 2) lose ((ME, _("%s: File layout code has unexpected value %d. Value " "should be 2, in big-endian or little-endian format."), - fh_get_filename (r->fh), hdr.layout_code)); + fh_get_file_name (r->fh), hdr.layout_code)); r->reverse_endian = 1; - bswap_int32 (&hdr.case_size); + bswap_int32 (&hdr.nominal_case_size); bswap_int32 (&hdr.compress); bswap_int32 (&hdr.weight_idx); bswap_int32 (&hdr.case_cnt); @@ -667,9 +954,10 @@ read_header (struct sfm_reader *r, /* Copy basic info and verify correctness. */ - r->value_cnt = hdr.case_size; + r->value_cnt = hdr.nominal_case_size; - /* If value count is rediculous, then force it to -1 (a sentinel value) */ + /* If value count is ridiculous, then force it to -1 (a + sentinel value). */ if ( r->value_cnt < 0 || r->value_cnt > (INT_MAX / (int) sizeof (union value) / 2)) r->value_cnt = -1; @@ -682,13 +970,13 @@ read_header (struct sfm_reader *r, if (r->case_cnt < -1 || r->case_cnt > INT_MAX / 2) lose ((ME, _("%s: Number of cases in file (%ld) is not between -1 and %d."), - fh_get_filename (r->fh), (long) r->case_cnt, INT_MAX / 2)); + fh_get_file_name (r->fh), (long) r->case_cnt, INT_MAX / 2)); r->bias = hdr.bias; if (r->bias != 100.0) corrupt_msg (MW, _("%s: Compression bias (%g) is not the usual " "value of 100."), - fh_get_filename (r->fh), r->bias); + fh_get_file_name (r->fh), r->bias); /* Make a file label only on the condition that the given label is not all spaces or nulls. */ @@ -738,7 +1026,7 @@ read_header (struct sfm_reader *r, return 1; -error: + error: return 0; } @@ -759,13 +1047,6 @@ read_variables (struct sfm_reader *r, *var_by_idx = 0; - /* Pre-allocate variables. */ - if (r->value_cnt != -1) - { - *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx); - r->vars = xnmalloc (r->value_cnt, sizeof *r->vars); - } - /* Read in the entry for each variable and use the info to initialize the dictionary. */ @@ -776,9 +1057,6 @@ read_variables (struct sfm_reader *r, int nv; int j; - if ( r->value_cnt != -1 && i >= r->value_cnt ) - break; - assertive_buf_read (r, &sv, sizeof sv, 0); if (r->reverse_endian) @@ -799,11 +1077,7 @@ read_variables (struct sfm_reader *r, break; } - if ( -1 == r->value_cnt ) - { - *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); - r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars); - } + *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -813,10 +1087,9 @@ read_variables (struct sfm_reader *r, if (sv.type != -1) lose ((ME, _("%s: position %d: String variable does not have " "proper number of continuation records."), - fh_get_filename (r->fh), i)); + fh_get_file_name (r->fh), i)); - r->vars[i].width = -1; (*var_by_idx)[i] = NULL; long_string_count--; continue; @@ -824,25 +1097,25 @@ read_variables (struct sfm_reader *r, else if (sv.type == -1) lose ((ME, _("%s: position %d: Superfluous long string continuation " "record."), - fh_get_filename (r->fh), i)); + fh_get_file_name (r->fh), i)); /* Check fields for validity. */ if (sv.type < 0 || sv.type > 255) lose ((ME, _("%s: position %d: Bad variable type code %d."), - fh_get_filename (r->fh), i, sv.type)); + fh_get_file_name (r->fh), i, sv.type)); if (sv.has_var_label != 0 && sv.has_var_label != 1) lose ((ME, _("%s: position %d: Variable label indicator field is not " - "0 or 1."), fh_get_filename (r->fh), i)); + "0 or 1."), fh_get_file_name (r->fh), i)); if (sv.n_missing_values < -3 || sv.n_missing_values > 3 || sv.n_missing_values == -1) lose ((ME, _("%s: position %d: Missing value indicator field is not " - "-3, -2, 0, 1, 2, or 3."), fh_get_filename (r->fh), i)); + "-3, -2, 0, 1, 2, or 3."), fh_get_file_name (r->fh), i)); /* Copy first character of variable name. */ if (sv.name[0] == '@' || sv.name[0] == '#') lose ((ME, _("%s: position %d: Variable name begins with invalid " "character."), - fh_get_filename (r->fh), i)); + fh_get_file_name (r->fh), i)); name[0] = sv.name[0]; @@ -860,14 +1133,15 @@ read_variables (struct sfm_reader *r, if ( ! var_is_plausible_name(name, false) ) lose ((ME, _("%s: Invalid variable name `%s' within system file."), - fh_get_filename (r->fh), name)); + fh_get_file_name (r->fh), name)); /* Create variable. */ vv = (*var_by_idx)[i] = dict_create_var (dict, name, sv.type); if (vv == NULL) lose ((ME, _("%s: Duplicate variable name `%s' within system file."), - fh_get_filename (r->fh), name)); + fh_get_file_name (r->fh), name)); + /* Set the short name the same as the long name */ var_set_short_name (vv, vv->name); /* Case reading data. */ @@ -890,7 +1164,7 @@ read_variables (struct sfm_reader *r, if (len < 0 || len > 255) lose ((ME, _("%s: Variable %s indicates variable label of invalid " "length %d."), - fh_get_filename (r->fh), vv->name, len)); + fh_get_file_name (r->fh), vv->name, len)); if ( len != 0 ) { @@ -911,7 +1185,7 @@ read_variables (struct sfm_reader *r, if (vv->width > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), - fh_get_filename (r->fh), vv->name)); + fh_get_file_name (r->fh), vv->name)); assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); @@ -932,7 +1206,7 @@ read_variables (struct sfm_reader *r, if (vv->type == ALPHA) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), - fh_get_filename (r->fh), vv->name)); + fh_get_file_name (r->fh), vv->name)); if (mv[0] == r->lowest) mv_add_num_range (&vv->miss, LOWEST, mv[1]); @@ -950,26 +1224,25 @@ read_variables (struct sfm_reader *r, || !parse_format_spec (r, sv.write, &vv->write, vv)) goto error; - r->vars[i].width = vv->width; - r->vars[i].fv = vv->fv; - + if ( vv->width != -1) + hsh_insert(r->var_hash, vv); } /* Some consistency checks. */ if (long_string_count != 0) lose ((ME, _("%s: Long string continuation records omitted at end of " "dictionary."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); if (next_value != r->value_cnt) corrupt_msg(MW, _("%s: System file header indicates %d variable positions but " - "%d were read from file."), - fh_get_filename (r->fh), r->value_cnt, next_value); + "%d were read from file."), + fh_get_file_name (r->fh), r->value_cnt, next_value); return 1; -error: + error: return 0; } @@ -982,13 +1255,13 @@ parse_format_spec (struct sfm_reader *r, int32_t s, f->type = translate_fmt ((s >> 16) & 0xff); if (f->type == -1) lose ((ME, _("%s: Bad format specifier byte (%d)."), - fh_get_filename (r->fh), (s >> 16) & 0xff)); + fh_get_file_name (r->fh), (s >> 16) & 0xff)); f->w = (s >> 8) & 0xff; f->d = s & 0xff; if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), - fh_get_filename (r->fh), + fh_get_file_name (r->fh), v->type == ALPHA ? _("String") : _("Numeric"), v->name, formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), @@ -1004,7 +1277,7 @@ parse_format_spec (struct sfm_reader *r, int32_t s, } return 1; -error: + error: return 0; } @@ -1015,11 +1288,11 @@ read_value_labels (struct sfm_reader *r, struct dictionary *dict, struct variable **var_by_idx) { struct label - { - char raw_value[8]; /* Value as uninterpreted bytes. */ - union value value; /* Value. */ - char *label; /* Null-terminated label string. */ - }; + { + char raw_value[8]; /* Value as uninterpreted bytes. */ + union value value; /* Value. */ + char *label; /* Null-terminated label string. */ + }; struct label *labels = NULL; int32_t n_labels; /* Number of labels. */ @@ -1041,7 +1314,7 @@ read_value_labels (struct sfm_reader *r, if ( n_labels >= ((int32_t) ~0) / sizeof *labels) { corrupt_msg(MW, _("%s: Invalid number of labels: %d. Ignoring labels."), - fh_get_filename (r->fh), n_labels); + fh_get_file_name (r->fh), n_labels); n_labels = 0; } @@ -1084,7 +1357,7 @@ read_value_labels (struct sfm_reader *r, if (rec_type != 4) lose ((ME, _("%s: Variable index record (type 4) does not immediately " "follow value label record (type 3) as it should."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); } /* Read number of variables associated with value label from type 4 @@ -1095,7 +1368,7 @@ read_value_labels (struct sfm_reader *r, if (n_vars < 1 || n_vars > dict_get_var_cnt (dict)) lose ((ME, _("%s: Number of variables associated with a value label (%d) " "is not between 1 and the number of variables (%d)."), - fh_get_filename (r->fh), n_vars, dict_get_var_cnt (dict))); + fh_get_file_name (r->fh), n_vars, dict_get_var_cnt (dict))); /* Read the list of variables. */ var = xnmalloc (n_vars, sizeof *var); @@ -1111,7 +1384,7 @@ read_value_labels (struct sfm_reader *r, if (var_idx < 1 || var_idx > r->value_cnt) lose ((ME, _("%s: Variable index associated with value label (%d) is " "not between 1 and the number of values (%d)."), - fh_get_filename (r->fh), var_idx, r->value_cnt)); + fh_get_file_name (r->fh), var_idx, r->value_cnt)); /* Make sure it's a real variable. */ v = var_by_idx[var_idx - 1]; @@ -1119,11 +1392,11 @@ read_value_labels (struct sfm_reader *r, lose ((ME, _("%s: Variable index associated with value label (%d) " "refers to a continuation of a string variable, not to " "an actual variable."), - fh_get_filename (r->fh), var_idx)); + fh_get_file_name (r->fh), var_idx)); if (v->type == ALPHA && v->width > MAX_SHORT_STRING) lose ((ME, _("%s: Value labels are not allowed on long string " "variables (%s)."), - fh_get_filename (r->fh), v->name)); + fh_get_file_name (r->fh), v->name)); /* Add it to the list of variables. */ var[i] = v; @@ -1135,7 +1408,7 @@ read_value_labels (struct sfm_reader *r, lose ((ME, _("%s: Variables associated with value label are not all of " "identical type. Variable %s has %s type, but variable " "%s has %s type."), - fh_get_filename (r->fh), + fh_get_file_name (r->fh), var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); @@ -1175,11 +1448,11 @@ read_value_labels (struct sfm_reader *r, if (var[0]->type == NUMERIC) msg (MW, _("%s: File contains duplicate label for value %g for " "variable %s."), - fh_get_filename (r->fh), label->value.f, v->name); + fh_get_file_name (r->fh), label->value.f, v->name); else msg (MW, _("%s: File contains duplicate label for value `%.*s' " "for variable %s."), - fh_get_filename (r->fh), v->width, label->value.s, v->name); + fh_get_file_name (r->fh), v->width, label->value.s, v->name); } } @@ -1189,7 +1462,7 @@ read_value_labels (struct sfm_reader *r, free (var); return 1; -error: + error: if (labels) { for (i = 0; i < n_labels; i++) @@ -1220,13 +1493,14 @@ buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) { if (ferror (r->file)) msg (ME, _("%s: Reading system file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); else corrupt_msg (ME, _("%s: Unexpected end of file."), - fh_get_filename (r->fh)); + fh_get_file_name (r->fh)); r->ok = false; return NULL; } + return buf; } @@ -1239,7 +1513,7 @@ buf_unread(struct sfm_reader *r, size_t byte_cnt) if ( 0 != fseek(r->file, -byte_cnt, SEEK_CUR)) { msg (ME, _("%s: Seeking system file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); } } @@ -1255,13 +1529,13 @@ read_documents (struct sfm_reader *r, struct dictionary *dict) if (dict_get_documents (dict) != NULL) lose ((ME, _("%s: System file contains multiple " "type 6 (document) records."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); assertive_buf_read (r, &line_cnt, sizeof line_cnt, 0); if (line_cnt <= 0) lose ((ME, _("%s: Number of document lines (%ld) " "must be greater than 0."), - fh_get_filename (r->fh), (long) line_cnt)); + fh_get_file_name (r->fh), (long) line_cnt)); documents = buf_read (r, NULL, 80 * line_cnt, line_cnt * 80 + 1); /* FIXME? Run through asciify. */ @@ -1272,7 +1546,7 @@ read_documents (struct sfm_reader *r, struct dictionary *dict) free (documents); return 1; -error: + error: return 0; } @@ -1294,7 +1568,7 @@ buffer_input (struct sfm_reader *r) if (ferror (r->file)) { msg (ME, _("%s: Error reading file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); r->ok = false; return 0; } @@ -1334,14 +1608,14 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) return 0; lose ((ME, _("%s: Compressed data is corrupted. Data ends " "in partial case."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); case 253: /* Code 253 indicates that the value is stored explicitly following the instruction bytes. */ if (r->ptr == NULL || r->ptr >= r->end) if (!buffer_input (r)) lose ((ME, _("%s: Unexpected end of file."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); memcpy (buf++, r->ptr++, sizeof *buf); if (buf >= buf_end) goto success; @@ -1382,7 +1656,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) { if (buf_beg != buf) lose ((ME, _("%s: Unexpected end of file."), - fh_get_filename (r->fh))); + fh_get_file_name (r->fh))); else return 0; } @@ -1393,18 +1667,32 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) abort (); -success: + success: /* We have filled up an entire record. Update state and return successfully. */ r->y = ++p; return 1; -error: + error: /* I/O error. */ r->ok = false; return 0; } + +static int +compare_var_index(const void *_v1, const void *_v2, void *aux UNUSED) +{ + const struct variable *const *v1 = _v1; + const struct variable *const *v2 = _v2; + + if ( (*v1)->index < (*v2)->index) + return -1; + + return ( (*v1)->index > (*v2)->index) ; +} + + /* Reads one case from READER's file into C. Returns nonzero only if successful. */ int @@ -1412,8 +1700,15 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { if (!r->ok) return 0; - - if (!r->compressed && sizeof (flt64) == sizeof (double)) + + if ( ! r->svars ) + { + r->svars = (struct variable **) hsh_data(r->var_hash); + sort(r->svars, hsh_count(r->var_hash), + sizeof(*r->svars), compare_var_index, 0); + } + + if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls) { /* Fast path: external and internal representations are the same, except possibly for endianness or SYSMIS. Read @@ -1428,9 +1723,12 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->value_cnt; i++) - if (r->vars[i].width == 0) - bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); + for (i = 0; i < hsh_count(r->var_hash); i++) + { + struct variable *v = r->svars[i]; + if (v->width == 0) + bswap_flt64 (&case_data_rw (c, v->fv)->f); + } } /* Fix up SYSMIS values if needed. @@ -1439,10 +1737,12 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) if (r->sysmis != SYSMIS) { int i; - - for (i = 0; i < r->value_cnt; i++) - if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) - case_data_rw (c, r->vars[i].fv)->f = SYSMIS; + for (i = 0; i < hsh_count(r->var_hash); i++) + { + struct variable *v = r->svars[i]; + if (v->width == 0 && case_num (c, i) == r->sysmis) + case_data_rw (c, v->fv)->f = SYSMIS; + } } } else @@ -1458,6 +1758,8 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) bounce_size = sizeof *bounce * r->value_cnt; bounce = bounce_cur = local_alloc (bounce_size); + memset(bounce, 0, bounce_size); + if (!r->compressed) read_ok = fread_ok (r, bounce, bounce_size); else @@ -1468,21 +1770,31 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) return 0; } - for (i = 0; i < r->value_cnt; i++) + for (i = 0; i < hsh_count(r->var_hash); i++) { - struct sfm_var *v = &r->vars[i]; + struct variable *tv = r->svars[i]; - if (v->width == 0) + if (tv->width == 0) { flt64 f = *bounce_cur++; if (r->reverse_endian) bswap_flt64 (&f); - case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; + case_data_rw (c, tv->fv)->f = f == r->sysmis ? SYSMIS : f; } - else if (v->width != -1) + else if (tv->width != -1) { - memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); - bounce_cur += DIV_RND_UP (v->width, sizeof (flt64)); + flt64 *bc_start = bounce_cur; + int ofs = 0; + while (ofs < tv->width ) + { + const int chunk = MIN (MAX_LONG_STRING, tv->width - ofs); + memcpy (case_data_rw (c, tv->fv)->s + ofs, bounce_cur, chunk); + + bounce_cur += DIV_RND_UP (chunk, sizeof (flt64)); + + ofs += chunk; + } + bounce_cur = bc_start + width_to_bytes(tv->width) / sizeof(flt64); } } @@ -1503,13 +1815,13 @@ fread_ok (struct sfm_reader *r, void *buffer, size_t byte_cnt) if (ferror (r->file)) { msg (ME, _("%s: Reading system file: %s."), - fh_get_filename (r->fh), strerror (errno)); + fh_get_file_name (r->fh), strerror (errno)); r->ok = false; } else if (read_bytes != 0) { msg (ME, _("%s: Partial record at end of system file."), - fh_get_filename (r->fh)); + fh_get_file_name (r->fh)); r->ok = false; } return 0;