X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-writer.c;h=000534c4844a2dd6bd2237c658add8f66c949109;hb=f89de8c330e8f82f0e7195c4d35588cfcbdd02fc;hp=823d0c47e29138950421e7103b964f8ba52b48a4;hpb=a19b858e0ac3c69e4a28c0ca6d8674427268a863;p=pspp-builds.git diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 823d0c47..000534c4 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -27,9 +27,7 @@ #include #include #include -#if HAVE_UNISTD_H -#include /* Required by SunOS4. */ -#endif +#include #include #include "case.h" #include "dictionary.h" @@ -44,12 +42,11 @@ #include "value-labels.h" #include "variable.h" #include +#include #include "gettext.h" #define _(msgid) gettext (msgid) -#include - /* Compression bias used by PSPP. Values between (1 - COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be compressed. */ @@ -65,6 +62,7 @@ struct sfm_writer int compress; /* 1=compressed, 0=not compressed. */ int case_cnt; /* Number of cases written so far. */ size_t flt64_cnt; /* Number of flt64 elements in case. */ + bool has_vls; /* Does the dict have very long strings? */ /* Compression buffering. */ flt64 *buf; /* Buffered data. */ @@ -76,6 +74,8 @@ struct sfm_writer /* Variables. */ struct sfm_var *vars; /* Variables. */ size_t var_cnt; /* Number of variables. */ + size_t var_cnt_vls; /* Number of variables including + very long string components. */ }; /* A variable in a system file. */ @@ -89,7 +89,7 @@ struct sfm_var static char *append_string_max (char *, const char *, const char *); static void write_header (struct sfm_writer *, const struct dictionary *); static void buf_write (struct sfm_writer *, const void *, size_t); -static void write_variable (struct sfm_writer *, struct variable *); +static void write_variable (struct sfm_writer *, const struct variable *); static void write_value_labels (struct sfm_writer *, struct variable *, int idx); static void write_rec_7_34 (struct sfm_writer *); @@ -97,18 +97,29 @@ static void write_rec_7_34 (struct sfm_writer *); static void write_longvar_table (struct sfm_writer *w, const struct dictionary *dict); +static void write_vls_length_table (struct sfm_writer *w, + const struct dictionary *dict); + + static void write_variable_display_parameters (struct sfm_writer *w, const struct dictionary *dict); static void write_documents (struct sfm_writer *, const struct dictionary *); -static int does_dict_need_translation (const struct dictionary *); static inline int var_flt64_cnt (const struct variable *v) +{ + assert(sizeof(flt64) == MAX_SHORT_STRING); + return width_to_bytes(v->width) / MAX_SHORT_STRING ; +} + +static inline int +var_flt64_cnt_nom (const struct variable *v) { return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64)); } + /* Returns default options for writing a system file. */ struct sfm_write_options sfm_writer_default_options (void) @@ -120,6 +131,28 @@ sfm_writer_default_options (void) return opts; } + +/* Return a short variable name to be used as the continuation of the + variable with the short name SN. + + FIXME: Need to resolve clashes somehow. + + */ +static const char * +cont_var_name(const char *sn, int idx) +{ + static char s[SHORT_NAME_LEN + 1]; + + char abb[SHORT_NAME_LEN + 1 - 3]= {0}; + + strncpy(abb, sn, SHORT_NAME_LEN - 3); + + snprintf(s, SHORT_NAME_LEN + 1, "%s%03d", abb, idx); + + return s; +} + + /* Opens the system file designated by file handle FH for writing cases from dictionary D according to the given OPTS. If COMPRESS is nonzero, the system file will be compressed. @@ -149,7 +182,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, mode = S_IRUSR | S_IRGRP | S_IROTH; if (opts.create_writeable) mode |= S_IWUSR | S_IWGRP | S_IWOTH; - fd = open (fh_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode); + fd = open (fh_get_file_name (fh), O_WRONLY | O_CREAT | O_TRUNC, mode); if (fd < 0) goto open_error; @@ -162,21 +195,27 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, w->fh = fh; w->file = fdopen (fd, "w"); - w->needs_translation = does_dict_need_translation (d); + w->needs_translation = dict_compacting_would_change (d); w->compress = opts.compress; w->case_cnt = 0; w->flt64_cnt = 0; + w->has_vls = false; w->buf = w->end = w->ptr = NULL; w->x = w->y = NULL; w->var_cnt = dict_get_var_cnt (d); + w->var_cnt_vls = w->var_cnt; w->vars = xnmalloc (w->var_cnt, sizeof *w->vars); for (i = 0; i < w->var_cnt; i++) { const struct variable *dv = dict_get_var (d, i); struct sfm_var *sv = &w->vars[i]; sv->width = dv->width; + /* spss compatibility nonsense */ + if ( dv->width > MAX_LONG_STRING ) + w->has_vls = true; + sv->fv = dv->fv; sv->flt64_cnt = var_flt64_cnt (dv); } @@ -194,7 +233,42 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, /* Write basic variable info. */ dict_assign_short_names (d); for (i = 0; i < dict_get_var_cnt (d); i++) - write_variable (w, dict_get_var (d, i)); + { + int count = 0; + const struct variable *v = dict_get_var(d, i); + int wcount = v->width; + + do { + struct variable var_cont = *v; + if ( v->type == ALPHA) + { + if ( 0 != count ) + { + mv_init(&var_cont.miss, 0); + strcpy(var_cont.short_name, + cont_var_name(v->short_name, count)); + var_cont.label = NULL; + w->var_cnt_vls++; + } + count++; + if ( wcount > MAX_LONG_STRING ) + { + var_cont.width = MAX_LONG_STRING; + wcount -= EFFECTIVE_LONG_STRING_LENGTH; + } + else + { + var_cont.width = wcount; + wcount -= var_cont.width; + } + + var_cont.write.w = var_cont.width; + var_cont.print.w = var_cont.width; + } + + write_variable (w, &var_cont); + } while(wcount > 0); + } /* Write out value labels. */ for (idx = i = 0; i < dict_get_var_cnt (d); i++) @@ -215,13 +289,15 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, if (opts.version >= 3) write_longvar_table (w, d); + write_vls_length_table(w, d); + /* Write end-of-headers record. */ { struct { - int32 rec_type P; - int32 filler P; - } + int32_t rec_type ; + int32_t filler ; + } ATTRIBUTE((packed)) rec_999; rec_999.rec_type = 999; @@ -250,27 +326,10 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, open_error: msg (ME, _("Error opening \"%s\" for writing as a system file: %s."), - fh_get_filename (fh), strerror (errno)); + fh_get_file_name (fh), strerror (errno)); goto error; } -static int -does_dict_need_translation (const struct dictionary *d) -{ - size_t case_idx; - size_t i; - - case_idx = 0; - for (i = 0; i < dict_get_var_cnt (d); i++) - { - struct variable *v = dict_get_var (d, i); - if (v->fv != case_idx) - return 0; - case_idx += v->nv; - } - return 1; -} - /* Returns value of X truncated to two least-significant digits. */ static int rerange (int x) @@ -304,8 +363,10 @@ write_header (struct sfm_writer *w, const struct dictionary *d) w->flt64_cnt = 0; for (i = 0; i < dict_get_var_cnt (d); i++) - w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i)); - hdr.case_size = w->flt64_cnt; + { + w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i)); + } + hdr.nominal_case_size = w->flt64_cnt; hdr.compress = w->compress; @@ -374,15 +435,16 @@ write_header (struct sfm_writer *w, const struct dictionary *d) /* Translates format spec from internal form in SRC to system file format in DEST. */ static inline void -write_format_spec (struct fmt_spec *src, int32 *dest) +write_format_spec (const struct fmt_spec *src, int32_t *dest) { + assert(check_output_specifier(src, true)); *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d; } /* Write the variable record(s) for primary variable P and secondary variable S to system file W. */ static void -write_variable (struct sfm_writer *w, struct variable *v) +write_variable (struct sfm_writer *w, const struct variable *v) { struct sysfile_variable sv; @@ -392,7 +454,7 @@ write_variable (struct sfm_writer *w, struct variable *v) int nm; /* Number of missing values, possibly negative. */ sv.rec_type = 2; - sv.type = v->width; + sv.type = min(v->width, MAX_LONG_STRING); sv.has_var_label = (v->label != NULL); mv_copy (&mv, &v->miss); @@ -427,9 +489,9 @@ write_variable (struct sfm_writer *w, struct variable *v) { struct label { - int32 label_len P; - char label[255] P; - } + int32_t label_len ; + char label[255] ; + } ATTRIBUTE((packed)) l; int ext_len; @@ -457,7 +519,8 @@ write_variable (struct sfm_writer *w, struct variable *v) memset (&sv.write, 0, sizeof sv.write); memset (&sv.name, 0, sizeof sv.name); - pad_count = DIV_RND_UP (v->width, (int) sizeof (flt64)) - 1; + pad_count = DIV_RND_UP (min(v->width, MAX_LONG_STRING), + (int) sizeof (flt64)) - 1; for (i = 0; i < pad_count; i++) buf_write (w, &sv, sizeof sv); } @@ -470,17 +533,17 @@ write_value_labels (struct sfm_writer *w, struct variable *v, int idx) { struct value_label_rec { - int32 rec_type P; - int32 n_labels P; - flt64 labels[1] P; - }; + int32_t rec_type ; + int32_t n_labels ; + flt64 labels[1] ; + } ATTRIBUTE((packed)); struct var_idx_rec { - int32 rec_type P; - int32 n_vars P; - int32 vars[1] P; - }; + int32_t rec_type ; + int32_t n_vars ; + int32_t vars[1] ; + } ATTRIBUTE((packed)); struct val_labs_iterator *i; struct value_label_rec *vlr; @@ -531,11 +594,10 @@ static void write_documents (struct sfm_writer *w, const struct dictionary *d) { struct - { - int32 rec_type P; /* Always 6. */ - int32 n_lines P; /* Number of lines of documents. */ - } - rec_6; + { + int32_t rec_type ; /* Always 6. */ + int32_t n_lines ; /* Number of lines of documents. */ + } ATTRIBUTE((packed)) rec_6; const char *documents; size_t n_lines; @@ -558,16 +620,16 @@ write_variable_display_parameters (struct sfm_writer *w, struct { - int32 rec_type P; - int32 subtype P; - int32 elem_size P; - int32 n_elem P; - } vdp_hdr; + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) vdp_hdr; vdp_hdr.rec_type = 7; vdp_hdr.subtype = 11; vdp_hdr.elem_size = 4; - vdp_hdr.n_elem = w->var_cnt * 3; + vdp_hdr.n_elem = w->var_cnt_vls * 3; buf_write (w, &vdp_hdr, sizeof vdp_hdr); @@ -576,11 +638,10 @@ write_variable_display_parameters (struct sfm_writer *w, struct variable *v; struct { - int32 measure P; - int32 width P; - int32 align P; - } - params; + int32_t measure ; + int32_t width ; + int32_t align ; + } ATTRIBUTE((packed)) params; v = dict_get_var(dict, i); @@ -589,7 +650,67 @@ write_variable_display_parameters (struct sfm_writer *w, params.align = v->alignment; buf_write (w, ¶ms, sizeof(params)); + + if ( v->width > MAX_LONG_STRING ) + { + int wcount = v->width - EFFECTIVE_LONG_STRING_LENGTH ; + + while (wcount > 0) + { + params.width = wcount > MAX_LONG_STRING ? 32 : wcount; + + buf_write (w, ¶ms, sizeof(params)); + + wcount -= EFFECTIVE_LONG_STRING_LENGTH ; + } + } + } +} + +/* Writes the table of lengths for Very Long String Variables */ +static void +write_vls_length_table (struct sfm_writer *w, + const struct dictionary *dict) +{ + int i; + struct + { + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) vls_hdr; + + struct string vls_length_map; + + ds_init_empty (&vls_length_map); + + vls_hdr.rec_type = 7; + vls_hdr.subtype = 14; + vls_hdr.elem_size = 1; + + + for (i = 0; i < dict_get_var_cnt (dict); ++i) + { + const struct variable *v = dict_get_var (dict, i); + + if ( v->width <= MAX_LONG_STRING ) + continue; + + ds_put_format (&vls_length_map, "%s=%05d", v->short_name, v->width); + ds_put_char (&vls_length_map, '\0'); + ds_put_char (&vls_length_map, '\t'); } + + vls_hdr.n_elem = ds_length (&vls_length_map); + + if ( vls_hdr.n_elem > 0 ) + { + buf_write (w, &vls_hdr, sizeof vls_hdr); + buf_write (w, ds_data (&vls_length_map), ds_length (&vls_length_map)); + } + + ds_destroy (&vls_length_map); } /* Writes the long variable name table */ @@ -598,24 +719,23 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) { struct { - int32 rec_type P; - int32 subtype P; - int32 elem_size P; - int32 n_elem P; - } - lv_hdr; + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) lv_hdr; struct string long_name_map; size_t i; - ds_init (&long_name_map, 10 * dict_get_var_cnt (dict)); + ds_init_empty (&long_name_map); for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); if (i) - ds_putc (&long_name_map, '\t'); - ds_printf (&long_name_map, "%s=%s", v->short_name, v->name); + ds_put_char (&long_name_map, '\t'); + ds_put_format (&long_name_map, "%s=%s", v->short_name, v->name); } lv_hdr.rec_type = 7; @@ -635,18 +755,17 @@ write_rec_7_34 (struct sfm_writer *w) { struct { - int32 rec_type_3 P; - int32 subtype_3 P; - int32 data_type_3 P; - int32 n_elem_3 P; - int32 elem_3[8] P; - int32 rec_type_4 P; - int32 subtype_4 P; - int32 data_type_4 P; - int32 n_elem_4 P; - flt64 elem_4[3] P; - } - rec_7; + int32_t rec_type_3 ; + int32_t subtype_3 ; + int32_t data_type_3 ; + int32_t n_elem_3 ; + int32_t elem_3[8] ; + int32_t rec_type_4 ; + int32_t subtype_4 ; + int32_t data_type_4 ; + int32_t n_elem_4 ; + flt64 elem_4[3] ; + } ATTRIBUTE((packed)) rec_7; /* Components of the version number, from major to minor. */ int version_component[3]; @@ -666,7 +785,7 @@ write_rec_7_34 (struct sfm_writer *w) rec_7.rec_type_3 = 7; rec_7.subtype_3 = 3; - rec_7.data_type_3 = sizeof (int32); + rec_7.data_type_3 = sizeof (int32_t); rec_7.n_elem_3 = 8; rec_7.elem_3[0] = version_component[0]; rec_7.elem_3[1] = version_component[1]; @@ -749,7 +868,7 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c) w->case_cnt++; if (!w->needs_translation && !w->compress - && sizeof (flt64) == sizeof (union value)) + && sizeof (flt64) == sizeof (union value) && ! w->has_vls ) { /* Fast path: external and internal representations are the same and the dictionary is properly ordered. Write @@ -762,21 +881,38 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c) Write into a bounce buffer, then write to W. */ flt64 *bounce; flt64 *bounce_cur; + flt64 *bounce_end; size_t bounce_size; size_t i; bounce_size = sizeof *bounce * w->flt64_cnt; bounce = bounce_cur = local_alloc (bounce_size); + bounce_end = bounce + bounce_size; for (i = 0; i < w->var_cnt; i++) { struct sfm_var *v = &w->vars[i]; + memset(bounce_cur, ' ', v->flt64_cnt * sizeof (flt64)); + if (v->width == 0) - *bounce_cur = case_num (c, v->fv); + { + *bounce_cur = case_num (c, v->fv); + bounce_cur += v->flt64_cnt; + } else - memcpy (bounce_cur, case_data (c, v->fv)->s, v->width); - bounce_cur += v->flt64_cnt; + { int ofs = 0; + while (ofs < v->width) + { + int chunk = MIN (MAX_LONG_STRING, v->width - ofs); + int nv = DIV_RND_UP (chunk, sizeof (flt64)); + buf_copy_rpad ((char *) bounce_cur, nv * sizeof (flt64), + case_data (c, v->fv)->s + ofs, chunk); + bounce_cur += nv; + ofs += chunk; + } + } + } if (!w->compress) @@ -887,7 +1023,7 @@ sfm_close_writer (struct sfm_writer *w) if (ok && !fseek (w->file, offsetof (struct sysfile_header, case_cnt), SEEK_SET)) { - int32 case_cnt = w->case_cnt; + int32_t case_cnt = w->case_cnt; fwrite (&case_cnt, sizeof case_cnt, 1, w->file); clearerr (w->file); } @@ -897,7 +1033,7 @@ sfm_close_writer (struct sfm_writer *w) if (!ok) msg (ME, _("An I/O error occurred writing system file \"%s\"."), - fh_get_filename (w->fh)); + fh_get_file_name (w->fh)); } fh_close (w->fh, "system file", "we");