From: Ben Pfaff Date: Mon, 13 Aug 2007 04:23:28 +0000 (+0000) Subject: * get.c (parse_read_command): Compact the values in the target X-Git-Tag: v0.6.0~312 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;ds=sidebyside;h=9db3101d0bbbcfb687acd3e442e550557e4e56b1;p=pspp-builds.git * get.c (parse_read_command): Compact the values in the target dictionary, to save space. * dictionary.c (struct copy_map): Removed. (struct dict_compactor): Removed. (dict_make_compactor): Removed. (dict_compactor_compact): Removed. (dict_compactor_destroy): Removed. * procedure.c (struct data_set): Change `compactor' member to be a struct case_map *. (proc_open): Use case_map_to_compact_dict instead of dict_make_compactor. (proc_casereader_read): Use case_map_execute instead of dict_compactor_compact. (proc_commit): Use case_map_destroy instead of dict_compactor_destroy. * scratch-writer.c (struct scratch_writer): Change `compactor' member to be a struct case_map *. (scratch_writer_open): Use case_map_to_compact_dict instead of dict_make_compactor. (scratch_writer_casewriter_write): Use case_map_execute instead of dict_compactor_compact. --- diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 12053de2..e32441d1 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,29 @@ +2007-08-12 Ben Pfaff + + Drop dict_compactor in favor of using the new struct case_map. + + * dictionary.c (struct copy_map): Removed. + (struct dict_compactor): Removed. + (dict_make_compactor): Removed. + (dict_compactor_compact): Removed. + (dict_compactor_destroy): Removed. + + * procedure.c (struct data_set): Change `compactor' member to be a + struct case_map *. + (proc_open): Use case_map_to_compact_dict instead of + dict_make_compactor. + (proc_casereader_read): Use case_map_execute instead of + dict_compactor_compact. + (proc_commit): Use case_map_destroy instead of + dict_compactor_destroy. + + * scratch-writer.c (struct scratch_writer): Change `compactor' + member to be a struct case_map *. + (scratch_writer_open): Use case_map_to_compact_dict instead of + dict_make_compactor. + (scratch_writer_casewriter_write): Use case_map_execute instead of + dict_compactor_compact. +o 2007-08-12 Ben Pfaff * automake.mk: Add case-map.c, case-map.h. diff --git a/src/data/case-map.c b/src/data/case-map.c new file mode 100644 index 00000000..a1a65cc1 --- /dev/null +++ b/src/data/case-map.c @@ -0,0 +1,242 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include + +#include + +#include +#include +#include +#include +#include + +/* A case map. */ +struct case_map + { + size_t value_cnt; /* Number of values in map. */ + int *map; /* For each destination index, the + corresponding source index. */ + }; + +/* Creates and returns an empty map. */ +static struct case_map * +create_case_map (size_t n) +{ + struct case_map *map; + size_t i; + + map = xmalloc (sizeof *map); + map->value_cnt = n; + map->map = xnmalloc (n, sizeof *map->map); + for (i = 0; i < map->value_cnt; i++) + map->map[i] = -1; + + return map; +} + +/* Inserts into MAP a mapping of the CNT values starting at FROM + to the CNT values starting at TO. */ +static void +insert_mapping (struct case_map *map, size_t from, size_t to, size_t cnt) +{ + size_t i; + + assert (to + cnt <= map->value_cnt); + for (i = 0; i < cnt; i++) + { + assert (map->map[to + i] == -1); + map->map[to + i] = from + i; + } +} + +/* Destroys case map MAP. */ +void +case_map_destroy (struct case_map *map) +{ + if (map != NULL) + { + free (map->map); + free (map); + } +} + +/* Maps from SRC to DST, applying case map MAP. */ +void +case_map_execute (const struct case_map *map, + const struct ccase *src, struct ccase *dst) +{ + size_t dst_idx; + + case_create (dst, map->value_cnt); + for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++) + { + int src_idx = map->map[dst_idx]; + if (src_idx != -1) + *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx); + } +} + +/* Returns the number of `union value's in cases created by + MAP. */ +size_t +case_map_get_value_cnt (const struct case_map *map) +{ + return map->value_cnt; +} + +/* Creates and returns a case_map that can be used to compact + cases for dictionary D. + + Compacting a case eliminates "holes" between values and after + the last value. (Holes are created by deleting variables.) + + All variables are compacted if EXCLUDE_CLASSES is 0, or it may + contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM), + or (1u << DC_SCRATCH) to cause the corresponding type of + variable to be deleted during compaction. */ +struct case_map * +case_map_to_compact_dict (const struct dictionary *d, + unsigned int exclude_classes) +{ + size_t var_cnt; + struct case_map *map; + size_t value_idx; + size_t i; + + assert ((exclude_classes & ~((1u << DC_ORDINARY) + | (1u << DC_SYSTEM) + | (1u << DC_SCRATCH))) == 0); + + map = create_case_map (dict_count_values (d, exclude_classes)); + var_cnt = dict_get_var_cnt (d); + value_idx = 0; + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + enum dict_class class = dict_class_from_id (var_get_name (v)); + + if (!(exclude_classes & (1u << class))) + { + size_t value_cnt = var_get_value_cnt (v); + insert_mapping (map, var_get_case_index (v), value_idx, value_cnt); + value_idx += value_cnt; + } + } + assert (value_idx == map->value_cnt); + + return map; +} + +/* Prepares dictionary D for producing a case map. Afterward, + the caller may delete, reorder, or rename variables within D + at will before using case_map_from_dict() to produce the case + map. + + Uses D's aux members, which must otherwise not be in use. */ +void +case_map_prepare_dict (const struct dictionary *d) +{ + size_t var_cnt = dict_get_var_cnt (d); + size_t i; + + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + int *src_fv = xmalloc (sizeof *src_fv); + *src_fv = var_get_case_index (v); + var_attach_aux (v, src_fv, var_dtor_free); + } +} + +/* Produces a case map from dictionary D, which must have been + previously prepared with case_map_prepare_dict(). + + Does not retain any reference to D, and clears the aux members + set up by case_map_prepare_dict(). + + Returns the new case map, or a null pointer if no mapping is + required (that is, no data has changed position). */ +struct case_map * +case_map_from_dict (const struct dictionary *d) +{ + struct case_map *map; + size_t var_cnt = dict_get_var_cnt (d); + size_t i; + bool identity_map = true; + + map = create_case_map (dict_get_next_value_idx (d)); + for (i = 0; i < var_cnt; i++) + { + struct variable *v = dict_get_var (d, i); + size_t value_cnt = var_get_value_cnt (v); + int *src_fv = (int *) var_detach_aux (v); + + if (var_get_case_index (v) != *src_fv) + identity_map = false; + + insert_mapping (map, *src_fv, var_get_case_index (v), value_cnt); + + free (src_fv); + } + + if (identity_map) + { + case_map_destroy (map); + return NULL; + } + + while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1) + map->value_cnt--; + + return map; +} + +/* Creates and returns a case map for mapping variables in OLD to + variables in NEW based on their name. For every variable in + NEW, there must be a variable in OLD with the same name, type, + and width. */ +struct case_map * +case_map_by_name (const struct dictionary *old, + const struct dictionary *new) +{ + struct case_map *map; + size_t var_cnt = dict_get_var_cnt (new); + size_t i; + + map = create_case_map (dict_get_next_value_idx (new)); + for (i = 0; i < var_cnt; i++) + { + struct variable *nv = dict_get_var (new, i); + struct variable *ov = dict_lookup_var_assert (old, var_get_name (nv)); + assert (var_get_width (nv) == var_get_width (ov)); + insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv), + var_get_value_cnt (ov)); + } + return map; +} + +/* Prints the mapping represented by case map CM to stdout, for + debugging purposes. */ +void +case_map_dump (const struct case_map *cm) +{ + int i; + for (i = 0 ; i < cm->value_cnt; ++i ) + printf ("%d -> %d\n", i, cm->map[i]); +} diff --git a/src/data/case-map.h b/src/data/case-map.h new file mode 100644 index 00000000..86d448ec --- /dev/null +++ b/src/data/case-map.h @@ -0,0 +1,56 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* Case map. + + A case map copies data from a case that corresponds to one + dictionary to a case that corresponds to a second dictionary. + A few options are available for ways to create the mapping. */ + +#ifndef DATA_CASE_MAP_H +#define DATA_CASE_MAP_H 1 + +#include + +struct case_map; +struct dictionary; +struct ccase; + +struct case_map *case_map_create (void); +void case_map_destroy (struct case_map *); +void case_map_execute (const struct case_map *, + const struct ccase *, struct ccase *); + +size_t case_map_get_value_cnt (const struct case_map *); + +/* For mapping cases for one version of a dictionary to those in + a modified version of the same dictionary. */ +void case_map_prepare_dict (const struct dictionary *); +struct case_map *case_map_from_dict (const struct dictionary *); + +/* For eliminating "holes" in a case. */ +struct case_map *case_map_to_compact_dict (const struct dictionary *d, + unsigned int exclude_classes); + +/* For mapping cases for one dictionary to another based on + variable names within the dictionary. */ +struct case_map *case_map_by_name (const struct dictionary *old, + const struct dictionary *new); + +void case_map_dump (const struct case_map *); + +#endif /* data/case-map.h */ diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 7964ad55..9589a72f 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -896,105 +896,6 @@ dict_count_values (const struct dictionary *d, unsigned int exclude_classes) return cnt; } -/* How to copy a contiguous range of values between cases. */ -struct copy_map - { - size_t src_idx; /* Starting value index in source case. */ - size_t dst_idx; /* Starting value index in target case. */ - size_t cnt; /* Number of values. */ - }; - -/* How to compact a case. */ -struct dict_compactor - { - struct copy_map *maps; /* Array of mappings. */ - size_t map_cnt; /* Number of mappings. */ - }; - -/* Creates and returns a dict_compactor that can be used to - compact cases for dictionary D. - - Compacting a case eliminates "holes" between values and after - the last value. (Holes are created by deleting variables.) - - All variables are compacted if EXCLUDE_CLASSES is 0, or it may - contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM), - or (1u << DC_SCRATCH) to cause the corresponding type of - variable to be deleted during compaction. */ -struct dict_compactor * -dict_make_compactor (const struct dictionary *d, unsigned int exclude_classes) -{ - struct dict_compactor *compactor; - struct copy_map *map; - size_t map_allocated; - size_t value_idx; - size_t i; - - assert ((exclude_classes & ~((1u << DC_ORDINARY) - | (1u << DC_SYSTEM) - | (1u << DC_SCRATCH))) == 0); - - compactor = xmalloc (sizeof *compactor); - compactor->maps = NULL; - compactor->map_cnt = 0; - map_allocated = 0; - - value_idx = 0; - map = NULL; - for (i = 0; i < d->var_cnt; i++) - { - struct variable *v = d->var[i]; - enum dict_class class = dict_class_from_id (var_get_name (v)); - if (exclude_classes & (1u << class)) - continue; - - if (map != NULL && map->src_idx + map->cnt == var_get_case_index (v)) - map->cnt += var_get_value_cnt (v); - else - { - if (compactor->map_cnt == map_allocated) - compactor->maps = x2nrealloc (compactor->maps, &map_allocated, - sizeof *compactor->maps); - map = &compactor->maps[compactor->map_cnt++]; - map->src_idx = var_get_case_index (v); - map->dst_idx = value_idx; - map->cnt = var_get_value_cnt (v); - } - value_idx += var_get_value_cnt (v); - } - - return compactor; -} - -/* Compacts SRC by copying it to DST according to the scheme in - COMPACTOR. - - Compacting a case eliminates "holes" between values and after - the last value. (Holes are created by deleting variables.) */ -void -dict_compactor_compact (const struct dict_compactor *compactor, - struct ccase *dst, const struct ccase *src) -{ - size_t i; - - for (i = 0; i < compactor->map_cnt; i++) - { - const struct copy_map *map = &compactor->maps[i]; - case_copy (dst, map->dst_idx, src, map->src_idx, map->cnt); - } -} - -/* Destroys COMPACTOR. */ -void -dict_compactor_destroy (struct dict_compactor *compactor) -{ - if (compactor != NULL) - { - free (compactor->maps); - free (compactor); - } -} - /* Returns the SPLIT FILE vars (see cmd_split_file()). Call dict_get_split_cnt() to determine how many SPLIT FILE vars there are. Returns a null pointer if and only if there are no diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 1cde778e..9990410a 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -106,12 +106,6 @@ size_t dict_count_values (const struct dictionary *, unsigned int exclude_classes); void dict_compact_values (struct dictionary *); -struct dict_compactor *dict_make_compactor (const struct dictionary *, - unsigned int exclude_classes); -void dict_compactor_compact (const struct dict_compactor *, - struct ccase *, const struct ccase *); -void dict_compactor_destroy (struct dict_compactor *); - const struct variable *const *dict_get_split_vars (const struct dictionary *); size_t dict_get_split_cnt (const struct dictionary *); void dict_set_split_vars (struct dictionary *, diff --git a/src/data/procedure.c b/src/data/procedure.c index 60be6b47..6a891c9b 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -75,9 +76,9 @@ struct dataset { added to. */ struct trns_chain *cur_trns_chain; - /* The compactor used to compact a case, if necessary; + /* The case map used to compact a case, if necessary; otherwise a null pointer. */ - struct dict_compactor *compactor; + struct case_map *compactor; /* Time at which proc was last invoked. */ time_t last_proc_invocation; @@ -174,7 +175,7 @@ proc_open (struct dataset *ds) size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH); bool should_compact = compacted_value_cnt < dict_get_next_value_idx (pd); ds->compactor = (should_compact - ? dict_make_compactor (pd, 1u << DC_SCRATCH) + ? case_map_to_compact_dict (pd, 1u << DC_SCRATCH) : NULL); ds->sink = autopaging_writer_create (compacted_value_cnt); } @@ -258,10 +259,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_, { struct ccase tmp; if (ds->compactor != NULL) - { - case_create (&tmp, casewriter_get_value_cnt (ds->sink)); - dict_compactor_compact (ds->compactor, &tmp, c); - } + case_map_execute (ds->compactor, c, &tmp); else case_clone (&tmp, c); casewriter_write (ds->sink, &tmp); @@ -326,7 +324,7 @@ proc_commit (struct dataset *ds) /* Finish compacting. */ if (ds->compactor != NULL) { - dict_compactor_destroy (ds->compactor); + case_map_destroy (ds->compactor); ds->compactor = NULL; dict_delete_scratch_vars (ds->dict); diff --git a/src/data/scratch-writer.c b/src/data/scratch-writer.c index c71608d3..4e2929a2 100644 --- a/src/data/scratch-writer.c +++ b/src/data/scratch-writer.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -38,7 +39,7 @@ struct scratch_writer { struct scratch_handle *handle; /* Underlying scratch handle. */ struct file_handle *fh; /* Underlying file handle. */ - struct dict_compactor *compactor; /* Compacts into handle->dictionary. */ + struct case_map *compactor; /* Compacts into handle->dictionary. */ struct casewriter *subwriter; /* Data output. */ }; @@ -55,7 +56,7 @@ scratch_writer_open (struct file_handle *fh, struct scratch_handle *sh; struct scratch_writer *writer; struct dictionary *scratch_dict; - struct dict_compactor *compactor; + struct case_map *compactor; struct casewriter *casewriter; size_t dict_value_cnt; @@ -73,7 +74,7 @@ scratch_writer_open (struct file_handle *fh, if (dict_count_values (scratch_dict, 0) < dict_get_next_value_idx (scratch_dict)) { - compactor = dict_make_compactor (scratch_dict, 0); + compactor = case_map_to_compact_dict (scratch_dict, 0); dict_compact_values (scratch_dict); } else @@ -106,12 +107,10 @@ scratch_writer_casewriter_write (struct casewriter *w UNUSED, void *writer_, struct ccase *c) { struct scratch_writer *writer = writer_; - struct scratch_handle *handle = writer->handle; struct ccase tmp; if (writer->compactor) { - case_create (&tmp, dict_get_next_value_idx (handle->dictionary)); - dict_compactor_compact (writer->compactor, &tmp, c); + case_map_execute (writer->compactor, c, &tmp); case_destroy (c); } else diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 608ee269..8eece80b 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,8 @@ +2007-08-12 Ben Pfaff + + * get.c (parse_read_command): Compact the values in the target + dictionary, to save space. + 2007-08-12 Ben Pfaff * get.c (struct case_map): Move into new file src/data/case-map.c. diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 8a8d7859..22ae6feb 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -121,6 +121,7 @@ parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command if (!parse_dict_trim (lexer, dict)) goto error; } + dict_compact_values (dict); map = case_map_from_dict (dict); if (map != NULL)