X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-writer.c;h=9586cbc2e38e503c75ede6be7ea754bd93d80a11;hb=dc29c57e7908b0bdc6ab84f8aa4b7cb8a780026c;hp=5ebf3ede0b97d79cc9dbc65a01ed2fdbadbf130e;hpb=bb0bebf8612a24fc5b58d0a85ff90ddb6d3f9e7a;p=pspp diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 5ebf3ede0b..9586cbc2e3 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -98,9 +98,10 @@ static const struct casewriter_class sys_file_casewriter_class; static void write_header (struct sfm_writer *, const struct dictionary *); static void write_variable (struct sfm_writer *, const struct variable *); -static void write_value_labels (struct sfm_writer *, struct variable *, - int idx); -static void write_integer_info_record (struct sfm_writer *); +static void write_value_labels (struct sfm_writer *, + const struct dictionary *); +static void write_integer_info_record (struct sfm_writer *, + const struct dictionary *); static void write_float_info_record (struct sfm_writer *); static void write_longvar_table (struct sfm_writer *w, @@ -177,7 +178,6 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, { struct sfm_writer *w; mode_t mode; - int idx; int i; /* Check version. */ @@ -235,20 +235,12 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, for (i = 0; i < dict_get_var_cnt (d); i++) write_variable (w, dict_get_var (d, i)); - /* Write out value labels. */ - idx = 0; - for (i = 0; i < dict_get_var_cnt (d); i++) - { - struct variable *v = dict_get_var (d, i); - - write_value_labels (w, v, idx); - idx += sfm_width_to_octs (var_get_width (v)); - } + write_value_labels (w, d); if (dict_get_document_line_cnt (d) > 0) write_documents (w, d); - write_integer_info_record (w); + write_integer_info_record (w, d); write_float_info_record (w); write_mrsets (w, d, true); @@ -514,47 +506,117 @@ write_variable (struct sfm_writer *w, const struct variable *v) mv_destroy (&mv); } -/* Writes the value labels for variable V having system file - variable index IDX to system file W. +/* Writes the value labels to system file W. Value labels for long string variables are written separately, by write_long_string_value_labels. */ static void -write_value_labels (struct sfm_writer *w, struct variable *v, int idx) +write_value_labels (struct sfm_writer *w, const struct dictionary *d) { - const struct val_labs *val_labs; - const struct val_lab **labels; - size_t n_labels; + struct label_set + { + struct hmap_node hmap_node; + const struct val_labs *val_labs; + int *indexes; + size_t n_indexes, allocated_indexes; + }; + + size_t n_sets, allocated_sets; + struct label_set **sets; + struct hmap same_sets; size_t i; + int idx; - val_labs = var_get_value_labels (v); - n_labels = val_labs_count (val_labs); - if (n_labels == 0 || var_get_width (v) > 8) - return; + n_sets = allocated_sets = 0; + sets = NULL; + hmap_init (&same_sets); - /* Value label record. */ - write_int (w, 3); /* Record type. */ - write_int (w, val_labs_count (val_labs)); - labels = val_labs_sorted (val_labs); - for (i = 0; i < n_labels; i++) + idx = 0; + for (i = 0; i < dict_get_var_cnt (d); i++) { - const struct val_lab *vl = labels[i]; - char *label = recode_string (var_get_encoding (v), UTF8, - val_lab_get_escaped_label (vl), -1); - uint8_t len = MIN (strlen (label), 255); - - write_value (w, val_lab_get_value (vl), var_get_width (v)); - write_bytes (w, &len, 1); - write_bytes (w, label, len); - write_zeros (w, REM_RND_UP (len + 1, 8)); - free (label); + struct variable *v = dict_get_var (d, i); + + if (var_has_value_labels (v) && var_get_width (v) <= 8) + { + const struct val_labs *val_labs = var_get_value_labels (v); + unsigned int hash = val_labs_hash (val_labs, 0); + struct label_set *set; + + HMAP_FOR_EACH_WITH_HASH (set, struct label_set, hmap_node, + hash, &same_sets) + { + if (val_labs_equal (set->val_labs, val_labs)) + { + if (set->n_indexes >= set->allocated_indexes) + set->indexes = x2nrealloc (set->indexes, + &set->allocated_indexes, + sizeof *set->indexes); + set->indexes[set->n_indexes++] = idx; + goto next_var; + } + } + + set = xmalloc (sizeof *set); + set->val_labs = val_labs; + set->indexes = xmalloc (sizeof *set->indexes); + set->indexes[0] = idx; + set->n_indexes = 1; + set->allocated_indexes = 1; + hmap_insert (&same_sets, &set->hmap_node, hash); + + if (n_sets >= allocated_sets) + sets = x2nrealloc (sets, &allocated_sets, sizeof *sets); + sets[n_sets++] = set; + } + + next_var: + idx += sfm_width_to_octs (var_get_width (v)); + } + + for (i = 0; i < n_sets; i++) + { + const struct label_set *set = sets[i]; + const struct val_labs *val_labs = set->val_labs; + size_t n_labels = val_labs_count (val_labs); + int width = val_labs_get_width (val_labs); + const struct val_lab **labels; + size_t j; + + /* Value label record. */ + write_int (w, 3); /* Record type. */ + write_int (w, n_labels); + labels = val_labs_sorted (val_labs); + for (j = 0; j < n_labels; j++) + { + const struct val_lab *vl = labels[j]; + char *label = recode_string (dict_get_encoding (d), UTF8, + val_lab_get_escaped_label (vl), -1); + uint8_t len = MIN (strlen (label), 255); + + write_value (w, val_lab_get_value (vl), width); + write_bytes (w, &len, 1); + write_bytes (w, label, len); + write_zeros (w, REM_RND_UP (len + 1, 8)); + free (label); + } + free (labels); + + /* Value label variable record. */ + write_int (w, 4); /* Record type. */ + write_int (w, set->n_indexes); + for (j = 0; j < set->n_indexes; j++) + write_int (w, set->indexes[j] + 1); } - free (labels); - /* Value label variable record. */ - write_int (w, 4); /* Record type. */ - write_int (w, 1); /* Number of variables. */ - write_int (w, idx + 1); /* Variable's dictionary index. */ + for (i = 0; i < n_sets; i++) + { + struct label_set *set = sets[i]; + + free (set->indexes); + free (set); + } + free (sets); + hmap_destroy (&same_sets); } /* Writes record type 6, document record. */ @@ -699,13 +761,15 @@ write_mrsets (struct sfm_writer *w, const struct dictionary *dict, const char *short_name_utf8 = var_get_short_name (mrset->vars[j], 0); char *short_name = recode_string (encoding, "UTF-8", short_name_utf8, -1); + str_lowercase (short_name); ds_put_format (&s, " %s", short_name); free (short_name); } ds_put_byte (&s, '\n'); } - write_string_record (w, ds_ss (&s), pre_v14 ? 7 : 19); + if (!ds_is_empty (&s)) + write_string_record (w, ds_ss (&s), pre_v14 ? 7 : 19); ds_destroy (&s); } @@ -885,10 +949,12 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) /* Write integer information record. */ static void -write_integer_info_record (struct sfm_writer *w) +write_integer_info_record (struct sfm_writer *w, + const struct dictionary *d) { int version_component[3]; int float_format; + int codepage; /* Parse the version string. */ memset (version_component, 0, sizeof version_component); @@ -906,6 +972,16 @@ write_integer_info_record (struct sfm_writer *w) else abort (); + /* Choose codepage. */ + codepage = sys_get_codepage_from_encoding (dict_get_encoding (d)); + if (codepage == 0) + { + /* Default to "7-bit ASCII" if the codepage number is unknown, because + many files use this codepage number regardless of their actual + encoding. */ + codepage = 2; + } + /* Write record. */ write_int (w, 7); /* Record type. */ write_int (w, 3); /* Record subtype. */ @@ -918,7 +994,7 @@ write_integer_info_record (struct sfm_writer *w) write_int (w, float_format); write_int (w, 1); /* Compression code. */ write_int (w, INTEGER_NATIVE == INTEGER_MSB_FIRST ? 1 : 2); - write_int (w, 2); /* 7-bit ASCII. */ + write_int (w, codepage); } /* Write floating-point information record. */