dictionary, to save space.
* dictionary.c (struct copy_map): Removed.
(struct dict_compactor): Removed.
(dict_make_compactor): Removed.
(dict_compactor_compact): Removed.
(dict_compactor_destroy): Removed.
* procedure.c (struct data_set): Change `compactor' member to be a
struct case_map *.
(proc_open): Use case_map_to_compact_dict instead of
dict_make_compactor.
(proc_casereader_read): Use case_map_execute instead of
dict_compactor_compact.
(proc_commit): Use case_map_destroy instead of
dict_compactor_destroy.
* scratch-writer.c (struct scratch_writer): Change `compactor'
member to be a struct case_map *.
(scratch_writer_open): Use case_map_to_compact_dict instead of
dict_make_compactor.
(scratch_writer_casewriter_write): Use case_map_execute instead of
dict_compactor_compact.
+2007-08-12 Ben Pfaff <blp@gnu.org>
+
+ Drop dict_compactor in favor of using the new struct case_map.
+
+ * dictionary.c (struct copy_map): Removed.
+ (struct dict_compactor): Removed.
+ (dict_make_compactor): Removed.
+ (dict_compactor_compact): Removed.
+ (dict_compactor_destroy): Removed.
+
+ * procedure.c (struct data_set): Change `compactor' member to be a
+ struct case_map *.
+ (proc_open): Use case_map_to_compact_dict instead of
+ dict_make_compactor.
+ (proc_casereader_read): Use case_map_execute instead of
+ dict_compactor_compact.
+ (proc_commit): Use case_map_destroy instead of
+ dict_compactor_destroy.
+
+ * scratch-writer.c (struct scratch_writer): Change `compactor'
+ member to be a struct case_map *.
+ (scratch_writer_open): Use case_map_to_compact_dict instead of
+ dict_make_compactor.
+ (scratch_writer_casewriter_write): Use case_map_execute instead of
+ dict_compactor_compact.
+o
2007-08-12 Ben Pfaff <blp@gnu.org>
* automake.mk: Add case-map.c, case-map.h.
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <data/case-map.h>
+
+#include <stdio.h>
+
+#include <data/dictionary.h>
+#include <data/variable.h>
+#include <data/case.h>
+#include <libpspp/alloc.h>
+#include <libpspp/assertion.h>
+
+/* A case map. */
+struct case_map
+ {
+ size_t value_cnt; /* Number of values in map. */
+ int *map; /* For each destination index, the
+ corresponding source index. */
+ };
+
+/* Creates and returns an empty map. */
+static struct case_map *
+create_case_map (size_t n)
+{
+ struct case_map *map;
+ size_t i;
+
+ map = xmalloc (sizeof *map);
+ map->value_cnt = n;
+ map->map = xnmalloc (n, sizeof *map->map);
+ for (i = 0; i < map->value_cnt; i++)
+ map->map[i] = -1;
+
+ return map;
+}
+
+/* Inserts into MAP a mapping of the CNT values starting at FROM
+ to the CNT values starting at TO. */
+static void
+insert_mapping (struct case_map *map, size_t from, size_t to, size_t cnt)
+{
+ size_t i;
+
+ assert (to + cnt <= map->value_cnt);
+ for (i = 0; i < cnt; i++)
+ {
+ assert (map->map[to + i] == -1);
+ map->map[to + i] = from + i;
+ }
+}
+
+/* Destroys case map MAP. */
+void
+case_map_destroy (struct case_map *map)
+{
+ if (map != NULL)
+ {
+ free (map->map);
+ free (map);
+ }
+}
+
+/* Maps from SRC to DST, applying case map MAP. */
+void
+case_map_execute (const struct case_map *map,
+ const struct ccase *src, struct ccase *dst)
+{
+ size_t dst_idx;
+
+ case_create (dst, map->value_cnt);
+ for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
+ {
+ int src_idx = map->map[dst_idx];
+ if (src_idx != -1)
+ *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
+ }
+}
+
+/* Returns the number of `union value's in cases created by
+ MAP. */
+size_t
+case_map_get_value_cnt (const struct case_map *map)
+{
+ return map->value_cnt;
+}
+
+/* Creates and returns a case_map that can be used to compact
+ cases for dictionary D.
+
+ Compacting a case eliminates "holes" between values and after
+ the last value. (Holes are created by deleting variables.)
+
+ All variables are compacted if EXCLUDE_CLASSES is 0, or it may
+ contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM),
+ or (1u << DC_SCRATCH) to cause the corresponding type of
+ variable to be deleted during compaction. */
+struct case_map *
+case_map_to_compact_dict (const struct dictionary *d,
+ unsigned int exclude_classes)
+{
+ size_t var_cnt;
+ struct case_map *map;
+ size_t value_idx;
+ size_t i;
+
+ assert ((exclude_classes & ~((1u << DC_ORDINARY)
+ | (1u << DC_SYSTEM)
+ | (1u << DC_SCRATCH))) == 0);
+
+ map = create_case_map (dict_count_values (d, exclude_classes));
+ var_cnt = dict_get_var_cnt (d);
+ value_idx = 0;
+ for (i = 0; i < var_cnt; i++)
+ {
+ struct variable *v = dict_get_var (d, i);
+ enum dict_class class = dict_class_from_id (var_get_name (v));
+
+ if (!(exclude_classes & (1u << class)))
+ {
+ size_t value_cnt = var_get_value_cnt (v);
+ insert_mapping (map, var_get_case_index (v), value_idx, value_cnt);
+ value_idx += value_cnt;
+ }
+ }
+ assert (value_idx == map->value_cnt);
+
+ return map;
+}
+
+/* Prepares dictionary D for producing a case map. Afterward,
+ the caller may delete, reorder, or rename variables within D
+ at will before using case_map_from_dict() to produce the case
+ map.
+
+ Uses D's aux members, which must otherwise not be in use. */
+void
+case_map_prepare_dict (const struct dictionary *d)
+{
+ size_t var_cnt = dict_get_var_cnt (d);
+ size_t i;
+
+ for (i = 0; i < var_cnt; i++)
+ {
+ struct variable *v = dict_get_var (d, i);
+ int *src_fv = xmalloc (sizeof *src_fv);
+ *src_fv = var_get_case_index (v);
+ var_attach_aux (v, src_fv, var_dtor_free);
+ }
+}
+
+/* Produces a case map from dictionary D, which must have been
+ previously prepared with case_map_prepare_dict().
+
+ Does not retain any reference to D, and clears the aux members
+ set up by case_map_prepare_dict().
+
+ Returns the new case map, or a null pointer if no mapping is
+ required (that is, no data has changed position). */
+struct case_map *
+case_map_from_dict (const struct dictionary *d)
+{
+ struct case_map *map;
+ size_t var_cnt = dict_get_var_cnt (d);
+ size_t i;
+ bool identity_map = true;
+
+ map = create_case_map (dict_get_next_value_idx (d));
+ for (i = 0; i < var_cnt; i++)
+ {
+ struct variable *v = dict_get_var (d, i);
+ size_t value_cnt = var_get_value_cnt (v);
+ int *src_fv = (int *) var_detach_aux (v);
+
+ if (var_get_case_index (v) != *src_fv)
+ identity_map = false;
+
+ insert_mapping (map, *src_fv, var_get_case_index (v), value_cnt);
+
+ free (src_fv);
+ }
+
+ if (identity_map)
+ {
+ case_map_destroy (map);
+ return NULL;
+ }
+
+ while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
+ map->value_cnt--;
+
+ return map;
+}
+
+/* Creates and returns a case map for mapping variables in OLD to
+ variables in NEW based on their name. For every variable in
+ NEW, there must be a variable in OLD with the same name, type,
+ and width. */
+struct case_map *
+case_map_by_name (const struct dictionary *old,
+ const struct dictionary *new)
+{
+ struct case_map *map;
+ size_t var_cnt = dict_get_var_cnt (new);
+ size_t i;
+
+ map = create_case_map (dict_get_next_value_idx (new));
+ for (i = 0; i < var_cnt; i++)
+ {
+ struct variable *nv = dict_get_var (new, i);
+ struct variable *ov = dict_lookup_var_assert (old, var_get_name (nv));
+ assert (var_get_width (nv) == var_get_width (ov));
+ insert_mapping (map, var_get_case_index (ov), var_get_case_index (nv),
+ var_get_value_cnt (ov));
+ }
+ return map;
+}
+
+/* Prints the mapping represented by case map CM to stdout, for
+ debugging purposes. */
+void
+case_map_dump (const struct case_map *cm)
+{
+ int i;
+ for (i = 0 ; i < cm->value_cnt; ++i )
+ printf ("%d -> %d\n", i, cm->map[i]);
+}
--- /dev/null
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* Case map.
+
+ A case map copies data from a case that corresponds to one
+ dictionary to a case that corresponds to a second dictionary.
+ A few options are available for ways to create the mapping. */
+
+#ifndef DATA_CASE_MAP_H
+#define DATA_CASE_MAP_H 1
+
+#include <stddef.h>
+
+struct case_map;
+struct dictionary;
+struct ccase;
+
+struct case_map *case_map_create (void);
+void case_map_destroy (struct case_map *);
+void case_map_execute (const struct case_map *,
+ const struct ccase *, struct ccase *);
+
+size_t case_map_get_value_cnt (const struct case_map *);
+
+/* For mapping cases for one version of a dictionary to those in
+ a modified version of the same dictionary. */
+void case_map_prepare_dict (const struct dictionary *);
+struct case_map *case_map_from_dict (const struct dictionary *);
+
+/* For eliminating "holes" in a case. */
+struct case_map *case_map_to_compact_dict (const struct dictionary *d,
+ unsigned int exclude_classes);
+
+/* For mapping cases for one dictionary to another based on
+ variable names within the dictionary. */
+struct case_map *case_map_by_name (const struct dictionary *old,
+ const struct dictionary *new);
+
+void case_map_dump (const struct case_map *);
+
+#endif /* data/case-map.h */
return cnt;
}
\f
-/* How to copy a contiguous range of values between cases. */
-struct copy_map
- {
- size_t src_idx; /* Starting value index in source case. */
- size_t dst_idx; /* Starting value index in target case. */
- size_t cnt; /* Number of values. */
- };
-
-/* How to compact a case. */
-struct dict_compactor
- {
- struct copy_map *maps; /* Array of mappings. */
- size_t map_cnt; /* Number of mappings. */
- };
-
-/* Creates and returns a dict_compactor that can be used to
- compact cases for dictionary D.
-
- Compacting a case eliminates "holes" between values and after
- the last value. (Holes are created by deleting variables.)
-
- All variables are compacted if EXCLUDE_CLASSES is 0, or it may
- contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM),
- or (1u << DC_SCRATCH) to cause the corresponding type of
- variable to be deleted during compaction. */
-struct dict_compactor *
-dict_make_compactor (const struct dictionary *d, unsigned int exclude_classes)
-{
- struct dict_compactor *compactor;
- struct copy_map *map;
- size_t map_allocated;
- size_t value_idx;
- size_t i;
-
- assert ((exclude_classes & ~((1u << DC_ORDINARY)
- | (1u << DC_SYSTEM)
- | (1u << DC_SCRATCH))) == 0);
-
- compactor = xmalloc (sizeof *compactor);
- compactor->maps = NULL;
- compactor->map_cnt = 0;
- map_allocated = 0;
-
- value_idx = 0;
- map = NULL;
- for (i = 0; i < d->var_cnt; i++)
- {
- struct variable *v = d->var[i];
- enum dict_class class = dict_class_from_id (var_get_name (v));
- if (exclude_classes & (1u << class))
- continue;
-
- if (map != NULL && map->src_idx + map->cnt == var_get_case_index (v))
- map->cnt += var_get_value_cnt (v);
- else
- {
- if (compactor->map_cnt == map_allocated)
- compactor->maps = x2nrealloc (compactor->maps, &map_allocated,
- sizeof *compactor->maps);
- map = &compactor->maps[compactor->map_cnt++];
- map->src_idx = var_get_case_index (v);
- map->dst_idx = value_idx;
- map->cnt = var_get_value_cnt (v);
- }
- value_idx += var_get_value_cnt (v);
- }
-
- return compactor;
-}
-
-/* Compacts SRC by copying it to DST according to the scheme in
- COMPACTOR.
-
- Compacting a case eliminates "holes" between values and after
- the last value. (Holes are created by deleting variables.) */
-void
-dict_compactor_compact (const struct dict_compactor *compactor,
- struct ccase *dst, const struct ccase *src)
-{
- size_t i;
-
- for (i = 0; i < compactor->map_cnt; i++)
- {
- const struct copy_map *map = &compactor->maps[i];
- case_copy (dst, map->dst_idx, src, map->src_idx, map->cnt);
- }
-}
-
-/* Destroys COMPACTOR. */
-void
-dict_compactor_destroy (struct dict_compactor *compactor)
-{
- if (compactor != NULL)
- {
- free (compactor->maps);
- free (compactor);
- }
-}
-
/* Returns the SPLIT FILE vars (see cmd_split_file()). Call
dict_get_split_cnt() to determine how many SPLIT FILE vars
there are. Returns a null pointer if and only if there are no
unsigned int exclude_classes);
void dict_compact_values (struct dictionary *);
-struct dict_compactor *dict_make_compactor (const struct dictionary *,
- unsigned int exclude_classes);
-void dict_compactor_compact (const struct dict_compactor *,
- struct ccase *, const struct ccase *);
-void dict_compactor_destroy (struct dict_compactor *);
-
const struct variable *const *dict_get_split_vars (const struct dictionary *);
size_t dict_get_split_cnt (const struct dictionary *);
void dict_set_split_vars (struct dictionary *,
#include <unistd.h>
#include <data/case.h>
+#include <data/case-map.h>
#include <data/caseinit.h>
#include <data/casereader.h>
#include <data/casereader-provider.h>
added to. */
struct trns_chain *cur_trns_chain;
- /* The compactor used to compact a case, if necessary;
+ /* The case map used to compact a case, if necessary;
otherwise a null pointer. */
- struct dict_compactor *compactor;
+ struct case_map *compactor;
/* Time at which proc was last invoked. */
time_t last_proc_invocation;
size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH);
bool should_compact = compacted_value_cnt < dict_get_next_value_idx (pd);
ds->compactor = (should_compact
- ? dict_make_compactor (pd, 1u << DC_SCRATCH)
+ ? case_map_to_compact_dict (pd, 1u << DC_SCRATCH)
: NULL);
ds->sink = autopaging_writer_create (compacted_value_cnt);
}
{
struct ccase tmp;
if (ds->compactor != NULL)
- {
- case_create (&tmp, casewriter_get_value_cnt (ds->sink));
- dict_compactor_compact (ds->compactor, &tmp, c);
- }
+ case_map_execute (ds->compactor, c, &tmp);
else
case_clone (&tmp, c);
casewriter_write (ds->sink, &tmp);
/* Finish compacting. */
if (ds->compactor != NULL)
{
- dict_compactor_destroy (ds->compactor);
+ case_map_destroy (ds->compactor);
ds->compactor = NULL;
dict_delete_scratch_vars (ds->dict);
#include <stdlib.h>
#include <data/case.h>
+#include <data/case-map.h>
#include <data/casereader.h>
#include <data/casewriter-provider.h>
#include <data/casewriter.h>
{
struct scratch_handle *handle; /* Underlying scratch handle. */
struct file_handle *fh; /* Underlying file handle. */
- struct dict_compactor *compactor; /* Compacts into handle->dictionary. */
+ struct case_map *compactor; /* Compacts into handle->dictionary. */
struct casewriter *subwriter; /* Data output. */
};
struct scratch_handle *sh;
struct scratch_writer *writer;
struct dictionary *scratch_dict;
- struct dict_compactor *compactor;
+ struct case_map *compactor;
struct casewriter *casewriter;
size_t dict_value_cnt;
if (dict_count_values (scratch_dict, 0)
< dict_get_next_value_idx (scratch_dict))
{
- compactor = dict_make_compactor (scratch_dict, 0);
+ compactor = case_map_to_compact_dict (scratch_dict, 0);
dict_compact_values (scratch_dict);
}
else
struct ccase *c)
{
struct scratch_writer *writer = writer_;
- struct scratch_handle *handle = writer->handle;
struct ccase tmp;
if (writer->compactor)
{
- case_create (&tmp, dict_get_next_value_idx (handle->dictionary));
- dict_compactor_compact (writer->compactor, &tmp, c);
+ case_map_execute (writer->compactor, c, &tmp);
case_destroy (c);
}
else
+2007-08-12 Ben Pfaff <blp@gnu.org>
+
+ * get.c (parse_read_command): Compact the values in the target
+ dictionary, to save space.
+
2007-08-12 Ben Pfaff <blp@gnu.org>
* get.c (struct case_map): Move into new file src/data/case-map.c.
if (!parse_dict_trim (lexer, dict))
goto error;
}
+ dict_compact_values (dict);
map = case_map_from_dict (dict);
if (map != NULL)