From: Ben Pfaff Date: Mon, 13 Aug 2007 03:44:45 +0000 (+0000) Subject: * psppire-dict.c (psppire_dict_dump): Don't use X-Git-Tag: v0.6.0~314 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6e352378b703f57313fdd0f628b99b19ff25d055;p=pspp-builds.git * psppire-dict.c (psppire_dict_dump): Don't use dict_get_compacted_dict_index_to_case_index, as that function has been deleted. * flip.c: Drop use of dict_get_compacted_dict_index_to_case_index and just use the ordinary case indexes. There seemed to be no reason for the former method. * get.c (case_map_get_value_cnt): New function. * dictionary.c (dict_compact_values): Don't delete scratch variables as well as compacting case indexes. Update all callers. (dict_get_compacted_value_cnt): Rename dict_count_values and change interface. Update all callers. (dict_get_compacted_value_cnt): Remove. (dict_compacting_would_shrink): Remove. (dict_compacting_would_change): Remove. (dict_make_compactor): Add new parameter. Update all callers. * procedure.c (proc_casereader_read): Use casewriter_get_value_cnt instead of dict_count_values, changing an O(N) operation into O(1). --- diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 176a6fc0..8b3e97cb 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,18 @@ +2007-08-12 Ben Pfaff + + * dictionary.c (dict_compact_values): Don't delete scratch + variables as well as compacting case indexes. Update all callers. + (dict_get_compacted_value_cnt): Rename dict_count_values and + change interface. Update all callers. + (dict_get_compacted_value_cnt): Remove. + (dict_compacting_would_shrink): Remove. + (dict_compacting_would_change): Remove. + (dict_make_compactor): Add new parameter. Update all callers. + + * procedure.c (proc_casereader_read): Use casewriter_get_value_cnt + instead of dict_count_values, changing an O(N) operation into + O(1). + 2007-08-12 Ben Pfaff * casereader.c (casereader_read): Don't require cases read by a diff --git a/src/data/dictionary.c b/src/data/dictionary.c index cee678e4..7964ad55 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -824,30 +824,22 @@ dict_get_case_size (const struct dictionary *d) return sizeof (union value) * dict_get_next_value_idx (d); } -/* Deletes scratch variables in dictionary D and reassigns values - so that fragmentation is eliminated. */ +/* Reassigns values in dictionary D so that fragmentation is + eliminated. */ void dict_compact_values (struct dictionary *d) { size_t i; d->next_value_idx = 0; - for (i = 0; i < d->var_cnt; ) + for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; - - if (dict_class_from_id (var_get_name (v)) != DC_SCRATCH) - { - set_var_case_index (v, d->next_value_idx); - d->next_value_idx += var_get_value_cnt (v); - i++; - } - else - dict_delete_var (d, v); + set_var_case_index (v, d->next_value_idx); + d->next_value_idx += var_get_value_cnt (v); } } - /* Reassigns case indices for D, increasing each index above START by the value PADDING. @@ -874,88 +866,34 @@ dict_pad_values (struct dictionary *d, int start, int padding) } -/* Returns the number of values that would be used by a case if - dict_compact_values() were called. */ +/* Returns the number of values occupied by the variables in + dictionary D. All variables are considered if EXCLUDE_CLASSES + is 0, or it may contain one or more of (1u << DC_ORDINARY), + (1u << DC_SYSTEM), or (1u << DC_SCRATCH) to exclude the + corresponding type of variable. + + The return value may be less than the number of values in one + of dictionary D's cases (as returned by + dict_get_next_value_idx) even if E is 0, because there may be + gaps in D's cases due to deleted variables. */ size_t -dict_get_compacted_value_cnt (const struct dictionary *d) +dict_count_values (const struct dictionary *d, unsigned int exclude_classes) { size_t i; size_t cnt; - cnt = 0; - for (i = 0; i < d->var_cnt; i++) - if (dict_class_from_id (var_get_name (d->var[i])) != DC_SCRATCH) - cnt += var_get_value_cnt (d->var[i]); - return cnt; -} - -/* Creates and returns an array mapping from a dictionary index - to the case index that the corresponding variable will have - after calling dict_compact_values(). Scratch variables - receive -1 for case index because dict_compact_values() will - delete them. */ -int * -dict_get_compacted_dict_index_to_case_index (const struct dictionary *d) -{ - size_t i; - size_t next_value_idx; - int *map; + assert ((exclude_classes & ~((1u << DC_ORDINARY) + | (1u << DC_SYSTEM) + | (1u << DC_SCRATCH))) == 0); - map = xnmalloc (d->var_cnt, sizeof *map); - next_value_idx = 0; + cnt = 0; for (i = 0; i < d->var_cnt; i++) { - struct variable *v = d->var[i]; - - if (dict_class_from_id (var_get_name (v)) != DC_SCRATCH) - { - map[i] = next_value_idx; - next_value_idx += var_get_value_cnt (v); - } - else - map[i] = -1; - } - return map; -} - -/* Returns true if a case for dictionary D would be smaller after - compacting, false otherwise. Compacting a case eliminates - "holes" between values and after the last value. Holes are - created by deleting variables (or by scratch variables). - - The return value may differ from whether compacting a case - from dictionary D would *change* the case: compacting could - rearrange values even if it didn't reduce space - requirements. */ -bool -dict_compacting_would_shrink (const struct dictionary *d) -{ - return dict_get_compacted_value_cnt (d) < dict_get_next_value_idx (d); -} - -/* Returns true if a case for dictionary D would change after - compacting, false otherwise. Compacting a case eliminates - "holes" between values and after the last value. Holes are - created by deleting variables (or by scratch variables). - - The return value may differ from whether compacting a case - from dictionary D would *shrink* the case: compacting could - rearrange values without reducing space requirements. */ -bool -dict_compacting_would_change (const struct dictionary *d) -{ - size_t case_idx; - size_t i; - - case_idx = 0; - for (i = 0; i < dict_get_var_cnt (d); i++) - { - struct variable *v = dict_get_var (d, i); - if (var_get_case_index (v) != case_idx) - return true; - case_idx += var_get_value_cnt (v); + enum dict_class class = dict_class_from_id (var_get_name (d->var[i])); + if (!(exclude_classes & (1u << class))) + cnt += var_get_value_cnt (d->var[i]); } - return false; + return cnt; } /* How to copy a contiguous range of values between cases. */ @@ -977,10 +915,14 @@ struct dict_compactor compact cases for dictionary D. Compacting a case eliminates "holes" between values and after - the last value. Holes are created by deleting variables (or - by scratch variables). */ + the last value. (Holes are created by deleting variables.) + + All variables are compacted if EXCLUDE_CLASSES is 0, or it may + contain one or more of (1u << DC_ORDINARY), (1u << DC_SYSTEM), + or (1u << DC_SCRATCH) to cause the corresponding type of + variable to be deleted during compaction. */ struct dict_compactor * -dict_make_compactor (const struct dictionary *d) +dict_make_compactor (const struct dictionary *d, unsigned int exclude_classes) { struct dict_compactor *compactor; struct copy_map *map; @@ -988,6 +930,10 @@ dict_make_compactor (const struct dictionary *d) size_t value_idx; size_t i; + assert ((exclude_classes & ~((1u << DC_ORDINARY) + | (1u << DC_SYSTEM) + | (1u << DC_SCRATCH))) == 0); + compactor = xmalloc (sizeof *compactor); compactor->maps = NULL; compactor->map_cnt = 0; @@ -998,9 +944,10 @@ dict_make_compactor (const struct dictionary *d) for (i = 0; i < d->var_cnt; i++) { struct variable *v = d->var[i]; - - if (dict_class_from_id (var_get_name (v)) == DC_SCRATCH) + enum dict_class class = dict_class_from_id (var_get_name (v)); + if (exclude_classes & (1u << class)) continue; + if (map != NULL && map->src_idx + map->cnt == var_get_case_index (v)) map->cnt += var_get_value_cnt (v); else @@ -1023,8 +970,7 @@ dict_make_compactor (const struct dictionary *d) COMPACTOR. Compacting a case eliminates "holes" between values and after - the last value. Holes are created by deleting variables (or - by scratch variables). */ + the last value. (Holes are created by deleting variables.) */ void dict_compactor_compact (const struct dict_compactor *compactor, struct ccase *dst, const struct ccase *src) diff --git a/src/data/dictionary.h b/src/data/dictionary.h index 6eb5c724..1cde778e 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -102,13 +102,12 @@ void dict_set_case_limit (struct dictionary *, size_t); int dict_get_next_value_idx (const struct dictionary *); size_t dict_get_case_size (const struct dictionary *); +size_t dict_count_values (const struct dictionary *, + unsigned int exclude_classes); void dict_compact_values (struct dictionary *); -size_t dict_get_compacted_value_cnt (const struct dictionary *); -int *dict_get_compacted_dict_index_to_case_index (const struct dictionary *); -bool dict_compacting_would_shrink (const struct dictionary *); -bool dict_compacting_would_change (const struct dictionary *); -struct dict_compactor *dict_make_compactor (const struct dictionary *); +struct dict_compactor *dict_make_compactor (const struct dictionary *, + unsigned int exclude_classes); void dict_compactor_compact (const struct dict_compactor *, struct ccase *, const struct ccase *); void dict_compactor_destroy (struct dict_compactor *); diff --git a/src/data/procedure.c b/src/data/procedure.c index 6690490f..60be6b47 100644 --- a/src/data/procedure.c +++ b/src/data/procedure.c @@ -170,11 +170,13 @@ proc_open (struct dataset *ds) /* Prepare sink. */ if (!ds->discard_output) { - ds->compactor = (dict_compacting_would_shrink (ds->permanent_dict) - ? dict_make_compactor (ds->permanent_dict) + struct dictionary *pd = ds->permanent_dict; + size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH); + bool should_compact = compacted_value_cnt < dict_get_next_value_idx (pd); + ds->compactor = (should_compact + ? dict_make_compactor (pd, 1u << DC_SCRATCH) : NULL); - ds->sink = autopaging_writer_create (dict_get_compacted_value_cnt ( - ds->permanent_dict)); + ds->sink = autopaging_writer_create (compacted_value_cnt); } else { @@ -257,7 +259,7 @@ proc_casereader_read (struct casereader *reader UNUSED, void *ds_, struct ccase tmp; if (ds->compactor != NULL) { - case_create (&tmp, dict_get_compacted_value_cnt (ds->dict)); + case_create (&tmp, casewriter_get_value_cnt (ds->sink)); dict_compactor_compact (ds->compactor, &tmp, c); } else @@ -325,8 +327,10 @@ proc_commit (struct dataset *ds) if (ds->compactor != NULL) { dict_compactor_destroy (ds->compactor); - dict_compact_values (ds->dict); ds->compactor = NULL; + + dict_delete_scratch_vars (ds->dict); + dict_compact_values (ds->dict); } /* Old data sink becomes new data source. */ diff --git a/src/data/scratch-writer.c b/src/data/scratch-writer.c index e085ca1c..c71608d3 100644 --- a/src/data/scratch-writer.c +++ b/src/data/scratch-writer.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -68,9 +69,11 @@ scratch_writer_open (struct file_handle *fh, /* Copy the dictionary and compact if needed. */ scratch_dict = dict_clone (dictionary); - if (dict_compacting_would_shrink (scratch_dict)) + dict_delete_scratch_vars (scratch_dict); + if (dict_count_values (scratch_dict, 0) + < dict_get_next_value_idx (scratch_dict)) { - compactor = dict_make_compactor (scratch_dict); + compactor = dict_make_compactor (scratch_dict, 0); dict_compact_values (scratch_dict); } else diff --git a/src/language/data-io/get.c b/src/language/data-io/get.c index 383513e6..a4f5f8fa 100644 --- a/src/language/data-io/get.c +++ b/src/language/data-io/get.c @@ -337,6 +337,7 @@ parse_write_command (struct lexer *lexer, struct dataset *ds, goto error; } + dict_delete_scratch_vars (dict); dict_compact_values (dict); if (fh_get_referent (handle) == FH_REF_FILE) @@ -983,6 +984,7 @@ cmd_match_files (struct lexer *lexer, struct dataset *ds) || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last)) goto error; + dict_delete_scratch_vars (mtf.dict); dict_compact_values (mtf.dict); mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict)); taint = taint_clone (casewriter_get_taint (mtf.output)); diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index db40ec5c..ac368fd0 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,9 @@ +2007-08-12 Ben Pfaff + + * flip.c: Drop use of dict_get_compacted_dict_index_to_case_index + and just use the ordinary case indexes. There seemed to be no + reason for the former method. + 2007-08-03 Ben Pfaff * rank.q (rank_cmd): Instead of sorting by SPLIT FILE vars, group diff --git a/src/language/stats/flip.c b/src/language/stats/flip.c index 53d9873d..f327adec 100644 --- a/src/language/stats/flip.c +++ b/src/language/stats/flip.c @@ -62,7 +62,6 @@ struct flip_pgm { struct pool *pool; /* Pool containing FLIP data. */ const struct variable **var; /* Variables to transpose. */ - int *idx_to_fv; /* var[]->index to compacted sink case fv. */ size_t var_cnt; /* Number of elements in `var'. */ int case_cnt; /* Pre-flip case count. */ @@ -101,8 +100,6 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) flip = pool_create_container (struct flip_pgm, pool); flip->var = NULL; - flip->idx_to_fv = dict_get_compacted_dict_index_to_case_index (dict); - pool_register (flip->pool, free, flip->idx_to_fv); flip->var_cnt = 0; flip->case_cnt = 0; flip->new_names = NULL; @@ -171,7 +168,6 @@ cmd_flip (struct lexer *lexer, struct dataset *ds) flip->case_cnt = 1; /* Read the active file into a flip_sink. */ - proc_make_temporary_transformations_permanent (ds); proc_discard_output (ds); input = proc_open (ds); @@ -318,11 +314,10 @@ write_flip_case (struct flip_pgm *flip, const struct ccase *c) if (flip->new_names != NULL) { struct varname *v = pool_alloc (flip->pool, sizeof *v); - int fv = flip->idx_to_fv[var_get_dict_index (flip->new_names)]; v->next = NULL; if (var_is_numeric (flip->new_names)) { - double f = case_num_idx (c, fv); + double f = case_num (c, flip->new_names); if (f == SYSMIS) strcpy (v->name, "VSYSMIS"); @@ -336,7 +331,7 @@ write_flip_case (struct flip_pgm *flip, const struct ccase *c) else { int width = MIN (var_get_width (flip->new_names), MAX_SHORT_STRING); - memcpy (v->name, case_str_idx (c, fv), width); + memcpy (v->name, case_str (c, flip->new_names), width); v->name[width] = 0; } @@ -350,15 +345,8 @@ write_flip_case (struct flip_pgm *flip, const struct ccase *c) /* Write to external file. */ for (i = 0; i < flip->var_cnt; i++) { - double out; - - if (var_is_numeric (flip->var[i])) - { - int fv = flip->idx_to_fv[var_get_dict_index (flip->var[i])]; - out = case_num_idx (c, fv); - } - else - out = SYSMIS; + const struct variable *v = flip->var[i]; + double out = var_is_numeric (v) ? case_num (c, v) : SYSMIS; fwrite (&out, sizeof out, 1, flip->file); } return true; diff --git a/src/ui/gui/ChangeLog b/src/ui/gui/ChangeLog index 7e8b703f..b5d9c6a8 100644 --- a/src/ui/gui/ChangeLog +++ b/src/ui/gui/ChangeLog @@ -1,3 +1,9 @@ +2007-08-12 Ben Pfaff + + * psppire-dict.c (psppire_dict_dump): Don't use + dict_get_compacted_dict_index_to_case_index, as that function has + been deleted. + 2007-08-13 John Darrington * psppire-case-file.c (psppire_case_file_append_case): diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c index 31761136..c9df6052 100644 --- a/src/ui/gui/psppire-dict.c +++ b/src/ui/gui/psppire-dict.c @@ -781,17 +781,14 @@ psppire_dict_dump (const PsppireDict *dict) gint i; const struct dictionary *d = dict->dict; - int *map = dict_get_compacted_dict_index_to_case_index (d); - for (i = 0; i < dict_get_var_cnt (d); ++i) { const struct variable *v = psppire_dict_get_variable (dict, i); int di = var_get_dict_index (v); - g_print ("\"%s\" idx=%d, fv=%d(%d), size=%d\n", + g_print ("\"%s\" idx=%d, fv=%d, size=%d\n", var_get_name(v), di, var_get_case_index(v), - map[di], value_cnt_from_width(var_get_width(v))); }