X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=20713a5667a5048fbf533e64e64d79da83db7b6c;hb=04fb909bd65eee66428d131ff34a6e1fde42e243;hp=bc58e4e45de0fe1b0513b7c323d3b3df094bfd0c;hpb=2766c2d47448010527d52dd304213d0bb563dd00;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index bc58e4e4..20713a56 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -23,14 +23,17 @@ #include #include #include +#include #include +#include #include #include #include #include #include #include +#include #include "sys-file-reader.h" #include "sfm-private.h" @@ -53,16 +56,17 @@ struct sfm_reader FILE *file; /* File stream. */ int reverse_endian; /* 1=file has endianness opposite us. */ - int fix_specials; /* 1=SYSMIS/HIGHEST/LOWEST differs from us. */ int value_cnt; /* Number of `union values's per case. */ long case_cnt; /* Number of cases, -1 if unknown. */ int compressed; /* 1=compressed, 0=not compressed. */ - double bias; /* Compression bias, usually 100.0. */ + double bias; /* Compression bias, usually 100.0. */ int weight_idx; /* 0-based index of weighting variable, or -1. */ bool ok; /* False after an I/O error or corrupt data. */ + bool has_vls; /* True if the file has one or more Very Long Strings*/ /* Variables. */ - struct sfm_var *vars; /* Variables. */ + struct sfm_var *vars; + size_t var_cnt; /* File's special constants. */ flt64 sysmis; @@ -129,16 +133,16 @@ corrupt_msg (int class, const char *format,...) va_list args; struct string text; - ds_create (&text, _("corrupt system file: ")); + ds_init_cstr (&text, _("corrupt system file: ")); va_start (args, format); - ds_vprintf (&text, format, args); + ds_put_vformat (&text, format, args); va_end (args); m.category = msg_class_to_category (class); m.severity = msg_class_to_severity (class); m.where.file_name = NULL; m.where.line_number = 0; - m.text = ds_c_str (&text); + m.text = ds_cstr (&text); msg_emit (&m); } @@ -266,6 +270,7 @@ pair_sn_free(void *p, void *aux UNUSED) } + /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the @@ -283,7 +288,6 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, /* A hash table of long variable names indexed by short name */ struct hsh_table *short_to_long = NULL; - *dict = dict_create (); if (!fh_open (fh, FH_REF_FILE, "system file", "rs")) goto error; @@ -294,13 +298,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->file = fn_open (fh_get_file_name (fh), "rb"); r->reverse_endian = 0; - r->fix_specials = 0; r->value_cnt = 0; r->case_cnt = 0; r->compressed = 0; r->bias = 100.0; r->weight_idx = -1; r->ok = true; + r->has_vls = false; r->vars = NULL; @@ -383,10 +387,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { struct { - int32_t subtype P; - int32_t size P; - int32_t count P; - } + int32_t subtype ; + int32_t size ; + int32_t count ; + } ATTRIBUTE((packed)) data; unsigned long bytes; @@ -439,16 +443,26 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, { struct { - int32_t measure P; - int32_t width P; - int32_t align P; - } + int32_t measure ; + int32_t width ; + int32_t align ; + } ATTRIBUTE((packed)) params; struct variable *v; assertive_buf_read (r, ¶ms, sizeof(params), 0); + if ( ! measure_is_valid(params.measure) + || + ! alignment_is_valid(params.align)) + { + msg(MW, + _("%s: Invalid variable display parameters. Default parameters substituted."), + fh_get_file_name(r->fh)); + continue; + } + v = dict_get_var(*dict, i); v->measure = params.measure; @@ -460,7 +474,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, case 13: /* SPSS 12.0 Long variable name map */ { - char *short_name, *save_ptr; + char *short_name; + char *save_ptr = NULL; int idx; /* Read data. */ @@ -538,7 +553,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, records have been processed. --- JMD 27 April 2006 */ - /* For compatability, make sure dictionary + /* For compatibility, make sure dictionary is in long variable name map order. In the common case, this has no effect, because the dictionary and the long @@ -566,6 +581,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } buffer[bytes] = '\0'; + r->has_vls = true; /* Note: SPSS v13 terminates this record with 00, whereas SPSS v14 terminates it with 00 09. We must @@ -612,7 +628,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, goto error; } - + l = length; if ( v->width > EFFECTIVE_LONG_STRING_LENGTH ) l -= EFFECTIVE_LONG_STRING_LENGTH; @@ -632,10 +648,13 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, dict_delete_var(*dict, v_next); } - + + assert ( length > MAX_LONG_STRING ); + v->width = length; v->print.w = v->width; v->write.w = v->width; + v->nv = DIV_RND_UP (length, MAX_SHORT_STRING); } eq_seen = false; memset(name, 0, SHORT_NAME_LEN+1); @@ -654,6 +673,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, } } free(buffer); + dict_compact_values(*dict); } break; @@ -692,6 +712,23 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, success: /* Come here on successful completion. */ + /* Create an index of dictionary variable widths for + sfm_read_case to use. We cannot use the `struct variables' + from the dictionary we created, because the caller owns the + dictionary and may destroy or modify its variables. */ + { + size_t i; + + r->var_cnt = dict_get_var_cnt (*dict); + r->vars = xnmalloc (r->var_cnt, sizeof *r->vars); + for (i = 0; i < r->var_cnt; i++) + { + struct variable *v = dict_get_var (*dict, i); + struct sfm_var *sv = &r->vars[i]; + sv->width = v->width; + sv->fv = v->fv; + } + } free (var_by_idx); hsh_destroy(short_to_long); @@ -969,13 +1006,6 @@ read_variables (struct sfm_reader *r, *var_by_idx = 0; - /* Pre-allocate variables. */ - if (r->value_cnt != -1) - { - *var_by_idx = xnmalloc (r->value_cnt, sizeof **var_by_idx); - r->vars = xnmalloc (r->value_cnt, sizeof *r->vars); - } - /* Read in the entry for each variable and use the info to initialize the dictionary. */ @@ -1007,7 +1037,6 @@ read_variables (struct sfm_reader *r, } *var_by_idx = xnrealloc (*var_by_idx, i + 1, sizeof **var_by_idx); - r->vars = xnrealloc (r->vars, i + 1, sizeof *r->vars); /* If there was a long string previously, make sure that the continuations are present; otherwise make sure there aren't @@ -1020,7 +1049,6 @@ read_variables (struct sfm_reader *r, fh_get_file_name (r->fh), i)); - r->vars[i].width = -1; (*var_by_idx)[i] = NULL; long_string_count--; continue; @@ -1154,10 +1182,6 @@ read_variables (struct sfm_reader *r, if (!parse_format_spec (r, sv.print, &vv->print, vv) || !parse_format_spec (r, sv.write, &vv->write, vv)) goto error; - - r->vars[i].width = vv->width; - r->vars[i].fv = vv->fv; - } /* Some consistency checks. */ @@ -1597,7 +1621,7 @@ read_compressed_data (struct sfm_reader *r, flt64 *buf) p = r->x; } - abort (); + NOT_REACHED (); success: /* We have filled up an entire record. Update state and return @@ -1618,8 +1642,8 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { if (!r->ok) return 0; - - if (!r->compressed && sizeof (flt64) == sizeof (double)) + + if (!r->compressed && sizeof (flt64) == sizeof (double) && ! r->has_vls) { /* Fast path: external and internal representations are the same, except possibly for endianness or SYSMIS. Read @@ -1634,7 +1658,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->value_cnt; i++) + for (i = 0; i < r->var_cnt; i++) if (r->vars[i].width == 0) bswap_flt64 (&case_data_rw (c, r->vars[i].fv)->f); } @@ -1646,7 +1670,7 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) { int i; - for (i = 0; i < r->value_cnt; i++) + for (i = 0; i < r->var_cnt; i++) if (r->vars[i].width == 0 && case_num (c, i) == r->sysmis) case_data_rw (c, r->vars[i].fv)->f = SYSMIS; } @@ -1664,6 +1688,8 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) bounce_size = sizeof *bounce * r->value_cnt; bounce = bounce_cur = local_alloc (bounce_size); + memset(bounce, 0, bounce_size); + if (!r->compressed) read_ok = fread_ok (r, bounce, bounce_size); else @@ -1674,21 +1700,31 @@ sfm_read_case (struct sfm_reader *r, struct ccase *c) return 0; } - for (i = 0; i < r->value_cnt; i++) + for (i = 0; i < r->var_cnt; i++) { - struct sfm_var *v = &r->vars[i]; + struct sfm_var *sv = &r->vars[i]; - if (v->width == 0) + if (sv->width == 0) { flt64 f = *bounce_cur++; if (r->reverse_endian) bswap_flt64 (&f); - case_data_rw (c, v->fv)->f = f == r->sysmis ? SYSMIS : f; + case_data_rw (c, sv->fv)->f = f == r->sysmis ? SYSMIS : f; } - else if (v->width != -1) + else { - memcpy (case_data_rw (c, v->fv)->s, bounce_cur, v->width); - bounce_cur += DIV_RND_UP (v->width, sizeof (flt64)); + flt64 *bc_start = bounce_cur; + int ofs = 0; + while (ofs < sv->width ) + { + const int chunk = MIN (MAX_LONG_STRING, sv->width - ofs); + memcpy (case_data_rw (c, sv->fv)->s + ofs, bounce_cur, chunk); + + bounce_cur += DIV_RND_UP (chunk, sizeof (flt64)); + + ofs += chunk; + } + bounce_cur = bc_start + width_to_bytes(sv->width) / sizeof(flt64); } }