X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=ccae0577b4d919c050a7fcf702841c97d0570c82;hb=888d0f91d57e0c3c5a4206c30ac71eb87bf44227;hp=0a040af60f5072dc604e2aad585c395e82951600;hpb=ebccf00cbddbcadb5883fa98ddbccbea67642295;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 0a040af6..ccae0577 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -55,7 +56,6 @@ struct sfm_reader FILE *file; /* File stream. */ int reverse_endian; /* 1=file has endianness opposite us. */ - int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ int value_cnt; /* Number of `union values's per case. */ long case_cnt; /* Number of cases, -1 if unknown. */ int compressed; /* 1=compressed, 0=not compressed. */ @@ -65,8 +65,8 @@ struct sfm_reader bool has_vls; /* True if the file has one or more Very Long Strings*/ /* Variables. */ - struct hsh_table *var_hash; - struct variable *const *svars; + struct sfm_var *vars; + size_t var_cnt; /* File's special constants. */ flt64 sysmis; @@ -86,7 +86,6 @@ struct sfm_reader /* A variable in a system file. */ struct sfm_var { - char name[SHORT_NAME_LEN + 1]; /* name */ int width; /* 0=numeric, otherwise string width. */ int fv; /* Index into case. */ }; @@ -134,16 +133,16 @@ corrupt_msg (int class, const char *format,...) va_list args; struct string text; - ds_create (&text, _("corrupt system file: ")); + ds_init_cstr (&text, _("corrupt system file: ")); va_start (args, format); - ds_vprintf (&text, format, args); + ds_put_vformat (&text, format, args); va_end (args); m.category = msg_class_to_category (class); m.severity = msg_class_to_severity (class); m.where.file_name = NULL; m.where.line_number = 0; - m.text = ds_c_str (&text); + m.text = ds_cstr (&text); msg_emit (&m); } @@ -166,7 +165,7 @@ sfm_close_reader (struct sfm_reader *r) if (r->fh != NULL) fh_close (r->fh, "system file", "rs"); - hsh_destroy(r->var_hash); + free (r->vars); free (r->buf); free (r); } @@ -216,7 +215,7 @@ struct name_pair }; static int -pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED) +pair_sn_compare(const void *_p1, const void *_p2, const void *aux UNUSED) { int i; @@ -247,7 +246,7 @@ pair_sn_compare(const void *_p1, const void *_p2, void *aux UNUSED) } static unsigned int -pair_sn_hash(const void *_p, void *aux UNUSED) +pair_sn_hash(const void *_p, const void *aux UNUSED) { int i; const struct name_pair *p = _p; @@ -265,66 +264,13 @@ pair_sn_hash(const void *_p, void *aux UNUSED) } static void -pair_sn_free(void *p, void *aux UNUSED) +pair_sn_free(void *p, const void *aux UNUSED) { free(p); } -/* A hsh_compare_func that orders variables A and B by their - names. */ -static int -compare_var_shortnames (const void *a_, const void *b_, void *foo UNUSED) -{ - int i; - const struct variable *a = a_; - const struct variable *b = b_; - - char buf1[SHORT_NAME_LEN + 1]; - char buf2[SHORT_NAME_LEN + 1]; - - memset(buf1, 0, SHORT_NAME_LEN + 1); - memset(buf2, 0, SHORT_NAME_LEN + 1); - - for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) - { - buf1[i] = a->short_name[i]; - if ( '\0' == buf1[i]) - break; - } - - for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) - { - buf2[i] = b->short_name[i]; - if ( '\0' == buf2[i]) - break; - } - - return strncmp(buf1, buf2, SHORT_NAME_LEN); -} - -/* A hsh_hash_func that hashes variable V based on its name. */ -static unsigned -hash_var_shortname (const void *v_, void *foo UNUSED) -{ - int i; - const struct variable *v = v_; - char buf[SHORT_NAME_LEN + 1]; - - memset(buf, 0, SHORT_NAME_LEN + 1); - for (i = 0 ; i <= SHORT_NAME_LEN ; ++i ) - { - buf[i] = v->short_name[i]; - if ( '\0' == buf[i]) - break; - } - - return hsh_hash_bytes(buf, strlen(buf)); -} - - - /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the @@ -342,7 +288,6 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, /* A hash table of long variable names indexed by short name */ struct hsh_table *short_to_long = NULL; - *dict = dict_create (); if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) goto error; @@ -353,7 +298,6 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->file = fn_open (fh_get_file_name (fh), "rb"); r->reverse_endian = 0; - r->fix_specials = 0; r->value_cnt = 0; r->case_cnt = 0; r->compressed = 0; @@ -361,9 +305,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->weight_idx = -1; r->ok = true; r->has_vls = false; - r->svars = 0; - r->var_hash = hsh_create(4, compare_var_shortnames, hash_var_shortname, 0, 0); + r->vars = NULL; r->sysmis = -FLT64_MAX; r->highest = FLT64_MAX; @@ -403,7 +346,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, lose ((ME, _("%s: Weighting variable may not be a continuation of " "a long string variable."), fh_get_file_name (fh))); - else if (weight_var->type == ALPHA) + else if (var_is_alpha (weight_var)) lose ((ME, _("%s: Weighting variable may not be a string variable."), fh_get_file_name (fh))); @@ -444,10 +387,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { struct { - int32_t subtype P; - int32_t size P; - int32_t count P; - } + int32_t subtype ; + int32_t size ; + int32_t count ; + } ATTRIBUTE((packed)) data; unsigned long bytes; @@ -496,36 +439,45 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, break; } - for ( i = 0 ; i < min(n_vars, dict_get_var_cnt(*dict)) ; ++i ) + for ( i = 0 ; i < MIN(n_vars, dict_get_var_cnt(*dict)) ; ++i ) { struct { - int32_t measure P; - int32_t width P; - int32_t align P; - } + int32_t measure ; + int32_t width ; + int32_t align ; + } ATTRIBUTE((packed)) params; struct variable *v; assertive_buf_read (r, ¶ms, sizeof(params), 0); + if ( ! measure_is_valid(params.measure) + || + ! alignment_is_valid(params.align)) + { + msg(MW, + _("%s: Invalid variable display parameters. Default parameters substituted."), + fh_get_file_name(r->fh)); + continue; + } + v = dict_get_var(*dict, i); - v->measure = params.measure; - v->display_width = params.width; - v->alignment = params.align; + var_set_measure (v, params.measure); + var_set_display_width (v, params.width); + var_set_alignment (v, params.align); } } break; case 13: /* SPSS 12.0 Long variable name map */ { - char *short_name, *save_ptr; + char *short_name; + char *save_ptr = NULL; int idx; - r->has_vls = true; - /* Read data. */ subrec14data = xmalloc (bytes + 1); if (!buf_read (r, subrec14data, bytes, 0)) @@ -601,7 +553,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, records have been processed. --- JMD 27 April 2006 */ - /* For compatability, make sure dictionary + /* For compatibility, make sure dictionary is in long variable name map order. In the common case, this has no effect, because the dictionary and the long @@ -629,6 +581,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } buffer[bytes] = '\0'; + r->has_vls = true; /* Note: SPSS v13 terminates this record with 00, whereas SPSS v14 terminates it with 00 09. We must @@ -677,10 +630,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } l = length; - if ( v->width > EFFECTIVE_LONG_STRING_LENGTH ) + if ( var_get_width (v) > EFFECTIVE_LONG_STRING_LENGTH ) l -= EFFECTIVE_LONG_STRING_LENGTH; else - l -= v->width; + l -= var_get_width (v); idx = v->index; while ( l > 0 ) @@ -688,22 +641,17 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, struct variable *v_next; v_next = dict_get_var(*dict, idx + 1); - if ( v_next->width > EFFECTIVE_LONG_STRING_LENGTH ) + if ( var_get_width (v_next) > EFFECTIVE_LONG_STRING_LENGTH ) l -= EFFECTIVE_LONG_STRING_LENGTH; else - l -= v_next->width; - - hsh_delete(r->var_hash, v_next); + l -= var_get_width (v_next); dict_delete_var(*dict, v_next); } assert ( length > MAX_LONG_STRING ); - v->width = length; - v->print.w = v->width; - v->write.w = v->width; - v->nv = DIV_RND_UP (length, MAX_SHORT_STRING); + var_set_width (v, length); } eq_seen = false; memset(name, 0, SHORT_NAME_LEN+1); @@ -761,6 +709,24 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, success: /* Come here on successful completion. */ + /* Create an index of dictionary variable widths for + sfm_read_case to use. We cannot use the `struct variables' + from the dictionary we created, because the caller owns the + dictionary and may destroy or modify its variables. */ + { + size_t i; + + r->var_cnt = dict_get_var_cnt (*dict); + r->vars = xnmalloc (r->var_cnt, sizeof *r->vars); + for (i = 0; i < r->var_cnt; i++) + { + struct variable *v = dict_get_var (*dict, i); + struct sfm_var *sv = &r->vars[i]; + sv->width = var_get_width (v); + sv->fv = v->fv; + } + } + free (var_by_idx); hsh_destroy(short_to_long); free (subrec14data); @@ -1046,6 +1012,8 @@ read_variables (struct sfm_reader *r, char name[SHORT_NAME_LEN + 1]; int nv; int j; + struct fmt_spec print, write; + assertive_buf_read (r, &sv, sizeof sv, 0); @@ -1132,7 +1100,7 @@ read_variables (struct sfm_reader *r, fh_get_file_name (r->fh), name)); /* Set the short name the same as the long name */ - var_set_short_name (vv, vv->name); + var_set_short_name (vv, var_get_name (vv)); /* Case reading data. */ nv = sv.type == 0 ? 1 : DIV_RND_UP (sv.type, sizeof (flt64)); @@ -1154,15 +1122,16 @@ read_variables (struct sfm_reader *r, if (len < 0 || len > 255) lose ((ME, _("%s: Variable %s indicates variable label of invalid " "length %d."), - fh_get_file_name (r->fh), vv->name, len)); + fh_get_file_name (r->fh), var_get_name (vv), len)); if ( len != 0 ) { /* Read label into variable structure. */ - vv->label = buf_read (r, NULL, ROUND_UP (len, sizeof (int32_t)), len + 1); - if (vv->label == NULL) - goto error; - vv->label[len] = '\0'; + char label[256]; + assertive_buf_read (r, label, ROUND_UP (len, sizeof (int32_t)), + 0); + label[len] = '\0'; + var_set_label (vv, label); } } @@ -1171,51 +1140,54 @@ read_variables (struct sfm_reader *r, { flt64 mv[3]; int mv_cnt = abs (sv.n_missing_values); + struct missing_values miss; - if (vv->width > MAX_SHORT_STRING) + if (var_get_width (vv) > MAX_SHORT_STRING) lose ((ME, _("%s: Long string variable %s may not have missing " "values."), - fh_get_file_name (r->fh), vv->name)); + fh_get_file_name (r->fh), var_get_name (vv))); + mv_init (&miss, var_get_width (vv)); assertive_buf_read (r, mv, sizeof *mv * mv_cnt, 0); - if (r->reverse_endian && vv->type == NUMERIC) + if (r->reverse_endian && var_is_numeric (vv)) for (j = 0; j < mv_cnt; j++) bswap_flt64 (&mv[j]); if (sv.n_missing_values > 0) { for (j = 0; j < sv.n_missing_values; j++) - if (vv->type == NUMERIC) - mv_add_num (&vv->miss, mv[j]); + if (var_is_numeric (vv)) + mv_add_num (&miss, mv[j]); else - mv_add_str (&vv->miss, (char *) &mv[j]); + mv_add_str (&miss, (char *) &mv[j]); } else { - if (vv->type == ALPHA) + if (var_is_alpha (vv)) lose ((ME, _("%s: String variable %s may not have missing " "values specified as a range."), - fh_get_file_name (r->fh), vv->name)); + fh_get_file_name (r->fh), var_get_name (vv))); if (mv[0] == r->lowest) - mv_add_num_range (&vv->miss, LOWEST, mv[1]); + mv_add_num_range (&miss, LOWEST, mv[1]); else if (mv[1] == r->highest) - mv_add_num_range (&vv->miss, mv[0], HIGHEST); + mv_add_num_range (&miss, mv[0], HIGHEST); else - mv_add_num_range (&vv->miss, mv[0], mv[1]); + mv_add_num_range (&miss, mv[0], mv[1]); if (sv.n_missing_values == -3) - mv_add_num (&vv->miss, mv[2]); + mv_add_num (&miss, mv[2]); } + var_set_missing_values (vv, &miss); } - if (!parse_format_spec (r, sv.print, &vv->print, vv) - || !parse_format_spec (r, sv.write, &vv->write, vv)) + if (!parse_format_spec (r, sv.print, &print, vv) + || !parse_format_spec (r, sv.write, &write, vv)) goto error; - if ( vv->width != -1) - hsh_insert(r->var_hash, vv); + var_set_print_format (vv, &print); + var_set_write_format (vv, &write); } /* Some consistency checks. */ @@ -1242,28 +1214,35 @@ static int parse_format_spec (struct sfm_reader *r, int32_t s, struct fmt_spec *f, const struct variable *v) { - f->type = translate_fmt ((s >> 16) & 0xff); - if (f->type == -1) + bool ok; + + if (!fmt_from_io ((s >> 16) & 0xff, &f->type)) lose ((ME, _("%s: Bad format specifier byte (%d)."), fh_get_file_name (r->fh), (s >> 16) & 0xff)); f->w = (s >> 8) & 0xff; f->d = s & 0xff; - if ((v->type == ALPHA) ^ ((formats[f->type].cat & FCAT_STRING) != 0)) + if (var_is_alpha (v) != fmt_is_string (f->type)) lose ((ME, _("%s: %s variable %s has %s format specifier %s."), fh_get_file_name (r->fh), - v->type == ALPHA ? _("String") : _("Numeric"), - v->name, - formats[f->type].cat & FCAT_STRING ? _("string") : _("numeric"), - formats[f->type].name)); - - if (!check_output_specifier (f, false) - || !check_specifier_width (f, v->width, false)) + var_is_alpha (v) ? _("String") : _("Numeric"), + var_get_name (v), + fmt_is_string (f->type) ? _("string") : _("numeric"), + fmt_name (f->type))); + + msg_disable (); + ok = fmt_check_output (f) && fmt_check_width_compat (f, var_get_width (v)); + msg_enable (); + + if (!ok) { + char fmt_string[FMT_STRING_LEN_MAX + 1]; msg (ME, _("%s variable %s has invalid format specifier %s."), - v->type == NUMERIC ? _("Numeric") : _("String"), - v->name, fmt_to_string (f)); - *f = v->type == NUMERIC ? f8_2 : make_output_format (FMT_A, v->width, 0); + var_is_numeric (v) ? _("Numeric") : _("String"), + var_get_name (v), fmt_to_string (f, fmt_string)); + *f = (var_is_numeric (v) + ? fmt_for_output (FMT_F, 8, 2) + : fmt_for_output (FMT_A, var_get_width (v), 0)); } return 1; @@ -1383,10 +1362,10 @@ read_value_labels (struct sfm_reader *r, "refers to a continuation of a string variable, not to " "an actual variable."), fh_get_file_name (r->fh), var_idx)); - if (v->type == ALPHA && v->width > MAX_SHORT_STRING) + if (var_is_long_string (v)) lose ((ME, _("%s: Value labels are not allowed on long string " "variables (%s)."), - fh_get_file_name (r->fh), v->name)); + fh_get_file_name (r->fh), var_get_name (v))); /* Add it to the list of variables. */ var[i] = v; @@ -1394,22 +1373,24 @@ read_value_labels (struct sfm_reader *r, /* Type check the variables. */ for (i = 1; i < n_vars; i++) - if (var[i]->type != var[0]->type) + if (var_get_type (var[i]) != var_get_type (var[0])) lose ((ME, _("%s: Variables associated with value label are not all of " "identical type. Variable %s has %s type, but variable " "%s has %s type."), fh_get_file_name (r->fh), - var[0]->name, var[0]->type == ALPHA ? _("string") : _("numeric"), - var[i]->name, var[i]->type == ALPHA ? _("string") : _("numeric"))); + var_get_name (var[0]), + var_is_alpha (var[0]) ? _("string") : _("numeric"), + var_get_name (var[i]), + var_is_alpha (var[i]) ? _("string") : _("numeric"))); /* Fill in labels[].value, now that we know the desired type. */ for (i = 0; i < n_labels; i++) { struct label *label = labels + i; - if (var[0]->type == ALPHA) + if (var_is_alpha (var[0])) { - const int copy_len = min (sizeof label->raw_value, + const int copy_len = MIN (sizeof label->raw_value, sizeof label->label); memcpy (label->value.s, label->raw_value, copy_len); } else { @@ -1435,14 +1416,15 @@ read_value_labels (struct sfm_reader *r, if (!val_labs_replace (v->val_labs, label->value, label->label)) continue; - if (var[0]->type == NUMERIC) + if (var_is_numeric (var[0])) msg (MW, _("%s: File contains duplicate label for value %g for " "variable %s."), - fh_get_file_name (r->fh), label->value.f, v->name); + fh_get_file_name (r->fh), label->value.f, var_get_name (v)); else msg (MW, _("%s: File contains duplicate label for value `%.*s' " "for variable %s."), - fh_get_file_name (r->fh), v->width, label->value.s, v->name); + fh_get_file_name (r->fh), var_get_width (v), + label->value.s, var_get_name (v)); } } @@ -1473,7 +1455,7 @@ buf_read (struct sfm_reader *r, void *buf, size_t byte_cnt, size_t min_alloc) assert (r); if (buf == NULL && byte_cnt > 0 ) - buf = xmalloc (max (byte_cnt, min_alloc)); + buf = xmalloc (MAX (byte_cnt, min_alloc)); if ( byte_cnt == 0 ) return buf; @@ -1655,7 +1637,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) p = r->x; } - abort (); + NOT_REACHED (); success: /* We have filled up an entire record. Update state and return @@ -1669,20 +1651,6 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) return 0; } - -static int -compare_var_index(const void *_v1, const void *_v2, void *aux UNUSED) -{ - const struct variable *const *v1 = _v1; - const struct variable *const *v2 = _v2; - - if ( (*v1)->index < (*v2)->index) - return -1; - - return ( (*v1)->index > (*v2)->index) ; -} - - /* Reads one case from READER's file into C. Returns nonzero only if successful. */ int @@ -1691,13 +1659,6 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) if (!r->ok) return 0; - if ( ! r->svars ) - { - r->svars = (struct variable *const *) hsh_data(r->var_hash); - sort(r->svars, hsh_count(r->var_hash), - sizeof(*r->svars), compare_var_index, 0); - } - if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls) { /* Fast path: external and internal representations are the @@ -1713,12 +1674,9 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < hsh_count(r->var_hash); i++) - { - struct variable *v = r->svars[i]; - if (v->width == 0) - bswap_flt64 (&case_data_rw (c, v->fv)->f); - } + for (i = 0; i < r->var_cnt; i++) + if (r->vars[i].width == 0) + bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); } /* Fix up SYSMIS values if needed. @@ -1727,12 +1685,10 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) if (r->sysmis != SYSMIS) { int i; - for (i = 0; i < hsh_count(r->var_hash); i++) - { - struct variable *v = r->svars[i]; - if (v->width == 0 && case_num (c, i) == r->sysmis) - case_data_rw (c, v->fv)->f = SYSMIS; - } + + for (i = 0; i < r->var_cnt; i++) + if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) + case_data_rw (c, r->vars[i].fv)->f = SYSMIS; } } else @@ -1760,31 +1716,31 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) return 0; } - for (i = 0; i < hsh_count(r->var_hash); i++) + for (i = 0; i < r->var_cnt; i++) { - struct variable *tv = r->svars[i]; + struct sfm_var *sv = &r->vars[i]; - if (tv->width == 0) + if (sv->width == 0) { flt64 f = *bounce_cur++; if (r->reverse_endian) bswap_flt64 (&f); - case_data_rw (c, tv->fv)->f = f == r->sysmis ? SYSMIS : f; + case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f; } - else if (tv->width != -1) + else { flt64 *bc_start = bounce_cur; int ofs = 0; - while (ofs < tv->width ) + while (ofs < sv->width ) { - const int chunk = MIN (MAX_LONG_STRING, tv->width - ofs); - memcpy (case_data_rw (c, tv->fv)->s + ofs, bounce_cur, chunk); + const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs); + memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk); bounce_cur += DIV_RND_UP (chunk, sizeof (flt64)); ofs += chunk; } - bounce_cur = bc_start + width_to_bytes(tv->width) / sizeof(flt64); + bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64); } }