X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Fdata%2Fsys-file-writer.c;h=ddfb4ae75c47288ec4d4ff2683c89ed76c2091fd;hb=338fb2a2e84df6427a2fdee6769421f57d5666d8;hp=11ca1ef9fe032f398054dd3348ec96c6bb333f13;hpb=c489ad9041918ca8c80dadceade988daab1d25f8;p=pspp diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 11ca1ef9fe..ddfb4ae75c 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -18,30 +18,38 @@ 02110-1301, USA. */ #include + #include "sys-file-writer.h" #include "sfm-private.h" -#include -#include +#include "sys-file-private.h" + #include #include #include +#include #include #include #include + #include -#include "case.h" -#include "dictionary.h" -#include -#include "file-handle-def.h" #include #include +#include #include -#include "settings.h" -#include "stat-macros.h" #include +#include + +#include "case.h" +#include "dictionary.h" +#include "file-handle-def.h" +#include "format.h" +#include "missing-values.h" +#include "settings.h" #include "value-labels.h" #include "variable.h" -#include + +#include "stat-macros.h" +#include "minmax.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -61,6 +69,7 @@ struct sfm_writer int compress; /* 1=compressed, 0=not compressed. */ int case_cnt; /* Number of cases written so far. */ size_t flt64_cnt; /* Number of flt64 elements in case. */ + bool has_vls; /* Does the dict have very long strings? */ /* Compression buffering. */ flt64 *buf; /* Buffered data. */ @@ -108,13 +117,14 @@ static inline int var_flt64_cnt (const struct variable *v) { assert(sizeof(flt64) == MAX_SHORT_STRING); - return width_to_bytes(v->width) / MAX_SHORT_STRING ; + return sfm_width_to_bytes(var_get_width (v)) / MAX_SHORT_STRING ; } static inline int var_flt64_cnt_nom (const struct variable *v) { - return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64)); + return (var_is_numeric (v) + ? 1 : DIV_RND_UP (var_get_width (v), sizeof (flt64))); } @@ -197,6 +207,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, w->compress = opts.compress; w->case_cnt = 0; w->flt64_cnt = 0; + w->has_vls = false; w->buf = w->end = w->ptr = NULL; w->x = w->y = NULL; @@ -208,12 +219,12 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, { const struct variable *dv = dict_get_var (d, i); struct sfm_var *sv = &w->vars[i]; - sv->width = dv->width; + sv->width = var_get_width (dv); /* spss compatibility nonsense */ - if ( dv->width > MAX_LONG_STRING ) - sv->width = (dv->width / MAX_LONG_STRING) * (MAX_LONG_STRING + 1) - + (dv->width % MAX_LONG_STRING) ; - sv->fv = dv->fv; + if ( var_get_width (dv) >= MIN_VERY_LONG_STRING ) + w->has_vls = true; + + sv->fv = var_get_case_index (dv); sv->flt64_cnt = var_flt64_cnt (dv); } @@ -233,37 +244,37 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, { int count = 0; const struct variable *v = dict_get_var(d, i); - int wcount = v->width; + int wcount = var_get_width (v); do { - struct variable var_cont = *v; - if ( v->type == ALPHA) + struct variable *var_cont = var_clone (v); + var_set_short_name (var_cont, var_get_short_name (v)); + if ( var_is_alpha (v)) { if ( 0 != count ) { - mv_init(&var_cont.miss, 0); - strcpy(var_cont.short_name, - cont_var_name(v->short_name, count)); - var_cont.label = NULL; + var_clear_missing_values (var_cont); + var_set_short_name (var_cont, + cont_var_name (var_get_short_name (v), + count)); + var_clear_label (var_cont); w->var_cnt_vls++; } count++; - if ( wcount > MAX_LONG_STRING ) + if ( wcount >= MIN_VERY_LONG_STRING ) { - var_cont.width = MAX_LONG_STRING; + var_set_width (var_cont, MIN_VERY_LONG_STRING - 1); wcount -= EFFECTIVE_LONG_STRING_LENGTH; } else { - var_cont.width = wcount; - wcount -= var_cont.width; + var_set_width (var_cont, wcount); + wcount -= var_get_width (var_cont); } - - var_cont.write.w = var_cont.width; - var_cont.print.w = var_cont.width; } - write_variable (w, &var_cont); + write_variable (w, var_cont); + var_destroy (var_cont); } while(wcount > 0); } @@ -292,9 +303,9 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, { struct { - int32_t rec_type P; - int32_t filler P; - } + int32_t rec_type ; + int32_t filler ; + } ATTRIBUTE((packed)) rec_999; rec_999.rec_type = 999; @@ -369,7 +380,7 @@ write_header (struct sfm_writer *w, const struct dictionary *d) if (dict_get_weight (d) != NULL) { - struct variable *weight_var; + const struct variable *weight_var; int recalc_weight_idx = 1; int i; @@ -434,8 +445,8 @@ write_header (struct sfm_writer *w, const struct dictionary *d) static inline void write_format_spec (const struct fmt_spec *src, int32_t *dest) { - assert(check_output_specifier(src, true)); - *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d; + assert (fmt_check_output (src)); + *dest = (fmt_to_io (src->type) << 16) | (src->w << 8) | src->d; } /* Write the variable record(s) for primary variable P and secondary @@ -449,12 +460,13 @@ write_variable (struct sfm_writer *w, const struct variable *v) struct missing_values mv; flt64 m[3]; /* Missing value values. */ int nm; /* Number of missing values, possibly negative. */ + const char *label = var_get_label (v); sv.rec_type = 2; - sv.type = min(v->width, MAX_LONG_STRING); - sv.has_var_label = (v->label != NULL); + sv.type = MIN (var_get_width (v), MIN_VERY_LONG_STRING - 1); + sv.has_var_label = label != NULL; - mv_copy (&mv, &v->miss); + mv_copy (&mv, var_get_missing_values (v)); nm = 0; if (mv_has_range (&mv)) { @@ -467,35 +479,36 @@ write_variable (struct sfm_writer *w, const struct variable *v) { union value value; mv_pop_value (&mv, &value); - if (v->type == NUMERIC) + if (var_is_numeric (v)) m[nm] = value.f; else - buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width); + buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, + var_get_width (v)); nm++; } - if (mv_has_range (&v->miss)) + if (mv_has_range (var_get_missing_values (v))) nm = -nm; sv.n_missing_values = nm; - write_format_spec (&v->print, &sv.print); - write_format_spec (&v->write, &sv.write); - buf_copy_str_rpad (sv.name, sizeof sv.name, v->short_name); + write_format_spec (var_get_print_format (v), &sv.print); + write_format_spec (var_get_write_format (v), &sv.write); + buf_copy_str_rpad (sv.name, sizeof sv.name, var_get_short_name (v)); buf_write (w, &sv, sizeof sv); - if (v->label) + if (label != NULL) { struct label { - int32_t label_len P; - char label[255] P; - } + int32_t label_len ; + char label[255] ; + } ATTRIBUTE((packed)) l; int ext_len; - l.label_len = min (strlen (v->label), 255); + l.label_len = MIN (strlen (label), 255); ext_len = ROUND_UP (l.label_len, sizeof l.label_len); - memcpy (l.label, v->label, l.label_len); + memcpy (l.label, label, l.label_len); memset (&l.label[l.label_len], ' ', ext_len - l.label_len); buf_write (w, &l, offsetof (struct label, label) + ext_len); @@ -504,7 +517,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) if (nm) buf_write (w, m, sizeof *m * abs (nm)); - if (v->type == ALPHA && v->width > (int) sizeof (flt64)) + if (var_is_alpha (v) && var_get_width (v) > (int) sizeof (flt64)) { int i; int pad_count; @@ -516,7 +529,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) memset (&sv.write, 0, sizeof sv.write); memset (&sv.name, 0, sizeof sv.name); - pad_count = DIV_RND_UP (min(v->width, MAX_LONG_STRING), + pad_count = DIV_RND_UP (MIN(var_get_width (v), MIN_VERY_LONG_STRING - 1), (int) sizeof (flt64)) - 1; for (i = 0; i < pad_count; i++) buf_write (w, &sv, sizeof sv); @@ -530,18 +543,19 @@ write_value_labels (struct sfm_writer *w, struct variable *v, int idx) { struct value_label_rec { - int32_t rec_type P; - int32_t n_labels P; - flt64 labels[1] P; - }; + int32_t rec_type ; + int32_t n_labels ; + flt64 labels[1] ; + } ATTRIBUTE((packed)); struct var_idx_rec { - int32_t rec_type P; - int32_t n_vars P; - int32_t vars[1] P; - }; + int32_t rec_type ; + int32_t n_vars ; + int32_t vars[1] ; + } ATTRIBUTE((packed)); + const struct val_labs *val_labs; struct val_labs_iterator *i; struct value_label_rec *vlr; struct var_idx_rec vir; @@ -549,23 +563,24 @@ write_value_labels (struct sfm_writer *w, struct variable *v, int idx) size_t vlr_size; flt64 *loc; - if (!val_labs_count (v->val_labs)) + val_labs = var_get_value_labels (v); + if (val_labs == NULL) return; /* Pass 1: Count bytes. */ vlr_size = (sizeof (struct value_label_rec) - + sizeof (flt64) * (val_labs_count (v->val_labs) - 1)); - for (vl = val_labs_first (v->val_labs, &i); vl != NULL; - vl = val_labs_next (v->val_labs, &i)) + + sizeof (flt64) * (val_labs_count (val_labs) - 1)); + for (vl = val_labs_first (val_labs, &i); vl != NULL; + vl = val_labs_next (val_labs, &i)) vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64)); /* Pass 2: Copy bytes. */ vlr = xmalloc (vlr_size); vlr->rec_type = 3; - vlr->n_labels = val_labs_count (v->val_labs); + vlr->n_labels = val_labs_count (val_labs); loc = vlr->labels; - for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL; - vl = val_labs_next (v->val_labs, &i)) + for (vl = val_labs_first_sorted (val_labs, &i); vl != NULL; + vl = val_labs_next (val_labs, &i)) { size_t len = strlen (vl->label); @@ -591,11 +606,10 @@ static void write_documents (struct sfm_writer *w, const struct dictionary *d) { struct - { - int32_t rec_type P; /* Always 6. */ - int32_t n_lines P; /* Number of lines of documents. */ - } - rec_6; + { + int32_t rec_type ; /* Always 6. */ + int32_t n_lines ; /* Number of lines of documents. */ + } ATTRIBUTE((packed)) rec_6; const char *documents; size_t n_lines; @@ -618,11 +632,11 @@ write_variable_display_parameters (struct sfm_writer *w, struct { - int32_t rec_type P; - int32_t subtype P; - int32_t elem_size P; - int32_t n_elem P; - } vdp_hdr; + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) vdp_hdr; vdp_hdr.rec_type = 7; vdp_hdr.subtype = 11; @@ -636,27 +650,26 @@ write_variable_display_parameters (struct sfm_writer *w, struct variable *v; struct { - int32_t measure P; - int32_t width P; - int32_t align P; - } - params; + int32_t measure ; + int32_t width ; + int32_t align ; + } ATTRIBUTE((packed)) params; v = dict_get_var(dict, i); - params.measure = v->measure; - params.width = v->display_width; - params.align = v->alignment; + params.measure = var_get_measure (v); + params.width = var_get_display_width (v); + params.align = var_get_alignment (v); buf_write (w, ¶ms, sizeof(params)); - if ( v->width > MAX_LONG_STRING ) + if (var_is_long_string (v)) { - int wcount = v->width - EFFECTIVE_LONG_STRING_LENGTH ; + int wcount = var_get_width (v) - EFFECTIVE_LONG_STRING_LENGTH ; while (wcount > 0) { - params.width = wcount > MAX_LONG_STRING ? 32 : wcount; + params.width = wcount >= MIN_VERY_LONG_STRING ? 32 : wcount; buf_write (w, ¶ms, sizeof(params)); @@ -673,17 +686,16 @@ write_vls_length_table (struct sfm_writer *w, { int i; struct - { - int32_t rec_type P; - int32_t subtype P; - int32_t elem_size P; - int32_t n_elem P; - } - vls_hdr; + { + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) vls_hdr; struct string vls_length_map; - ds_init (&vls_length_map, 12 * dict_get_var_cnt (dict)); + ds_init_empty (&vls_length_map); vls_hdr.rec_type = 7; vls_hdr.subtype = 14; @@ -694,12 +706,13 @@ write_vls_length_table (struct sfm_writer *w, { const struct variable *v = dict_get_var (dict, i); - if ( v->width <= MAX_LONG_STRING ) + if ( var_get_width (v) < MIN_VERY_LONG_STRING ) continue; - ds_printf (&vls_length_map, "%s=%05d", v->short_name, v->width); - ds_putc (&vls_length_map, '\0'); - ds_putc (&vls_length_map, '\t'); + ds_put_format (&vls_length_map, "%s=%05d", + var_get_short_name (v), var_get_width (v)); + ds_put_char (&vls_length_map, '\0'); + ds_put_char (&vls_length_map, '\t'); } vls_hdr.n_elem = ds_length (&vls_length_map); @@ -719,24 +732,24 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) { struct { - int32_t rec_type P; - int32_t subtype P; - int32_t elem_size P; - int32_t n_elem P; - } - lv_hdr; + int32_t rec_type ; + int32_t subtype ; + int32_t elem_size ; + int32_t n_elem ; + } ATTRIBUTE((packed)) lv_hdr; struct string long_name_map; size_t i; - ds_init (&long_name_map, 10 * dict_get_var_cnt (dict)); + ds_init_empty (&long_name_map); for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); if (i) - ds_putc (&long_name_map, '\t'); - ds_printf (&long_name_map, "%s=%s", v->short_name, v->name); + ds_put_char (&long_name_map, '\t'); + ds_put_format (&long_name_map, "%s=%s", + var_get_short_name (v), var_get_name (v)); } lv_hdr.rec_type = 7; @@ -756,18 +769,17 @@ write_rec_7_34 (struct sfm_writer *w) { struct { - int32_t rec_type_3 P; - int32_t subtype_3 P; - int32_t data_type_3 P; - int32_t n_elem_3 P; - int32_t elem_3[8] P; - int32_t rec_type_4 P; - int32_t subtype_4 P; - int32_t data_type_4 P; - int32_t n_elem_4 P; - flt64 elem_4[3] P; - } - rec_7; + int32_t rec_type_3 ; + int32_t subtype_3 ; + int32_t data_type_3 ; + int32_t n_elem_3 ; + int32_t elem_3[8] ; + int32_t rec_type_4 ; + int32_t subtype_4 ; + int32_t data_type_4 ; + int32_t n_elem_4 ; + flt64 elem_4[3] ; + } ATTRIBUTE((packed)) rec_7; /* Components of the version number, from major to minor. */ int version_component[3]; @@ -837,7 +849,7 @@ buf_write (struct sfm_writer *w, const void *buf, size_t nbytes) static char * append_string_max (char *dest, const char *src, const char *end) { - int nbytes = min (end - dest, (int) strlen (src)); + int nbytes = MIN (end - dest, (int) strlen (src)); memcpy (dest, src, nbytes); return dest + nbytes; } @@ -870,7 +882,7 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c) w->case_cnt++; if (!w->needs_translation && !w->compress - && sizeof (flt64) == sizeof (union value)) + && sizeof (flt64) == sizeof (union value) && ! w->has_vls ) { /* Fast path: external and internal representations are the same and the dictionary is properly ordered. Write @@ -898,14 +910,23 @@ sfm_write_case (struct sfm_writer *w, const struct ccase *c) memset(bounce_cur, ' ', v->flt64_cnt * sizeof (flt64)); if (v->width == 0) - *bounce_cur = case_num (c, v->fv); - else { - buf_copy_rpad((char*)bounce_cur, v->flt64_cnt * sizeof (flt64), - case_data(c, v->fv)->s, - v->width); + *bounce_cur = case_num_idx (c, v->fv); + bounce_cur += v->flt64_cnt; } - bounce_cur += v->flt64_cnt; + else + { int ofs = 0; + while (ofs < v->width) + { + int chunk = MIN (MIN_VERY_LONG_STRING - 1, v->width - ofs); + int nv = DIV_RND_UP (chunk, sizeof (flt64)); + buf_copy_rpad ((char *) bounce_cur, nv * sizeof (flt64), + case_data_idx (c, v->fv)->s + ofs, chunk); + bounce_cur += nv; + ofs += chunk; + } + } + } if (!w->compress)