From: Ben Pfaff Date: Tue, 21 Feb 2023 01:04:18 +0000 (-0800) Subject: it works!! X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Fdelete-variables;p=pspp it works!! --- diff --git a/doc/automake.mk b/doc/automake.mk index aea1a6d6c7..110e379d11 100644 --- a/doc/automake.mk +++ b/doc/automake.mk @@ -238,8 +238,8 @@ endif $(FIGURE_SPVS): $(pspp) .sps.spv: - $(AM_V_GEN)(cd "$(top_srcdir)/examples" \ - && "$(abs_top_builddir)/$(pspp)" ../doc/pspp-figures/$( $@.tmp + $(AM_V_GEN)of=`pwd`/$@.tmp; (cd "$(top_srcdir)/examples" \ + && "$(abs_top_builddir)/$(pspp)" ../doc/pspp-figures/$(subreader), - &casereader_stateless_translator_class, ct); - taint_propagate (casereader_get_taint (ct->subreader), + output_proto, casereader_get_n_cases (cst->subreader), + &casereader_stateless_translator_class, cst); + taint_propagate (casereader_get_taint (cst->subreader), casereader_get_taint (reader)); return reader; } @@ -160,24 +169,32 @@ casereader_translate_stateless ( /* Internal read function for stateless translating casereader. */ static struct ccase * casereader_stateless_translator_read (struct casereader *reader UNUSED, - void *ct_, casenumber idx) + void *cst_, casenumber idx) { - struct casereader_translator *ct = ct_; - struct ccase *tmp = casereader_peek (ct->subreader, idx); + struct casereader_stateless_translator *cst = cst_; + struct ccase *tmp = casereader_peek (cst->subreader, idx); if (tmp != NULL) - tmp = ct->class->translate (tmp, ct->aux); + tmp = cst->class->translate (tmp, cst->aux); return tmp; } /* Internal destroy function for translating casereader. */ static void casereader_stateless_translator_destroy (struct casereader *reader UNUSED, - void *ct_) + void *cst_) { - struct casereader_translator *ct = ct_; - casereader_destroy (ct->subreader); - ct->class->destroy (ct->aux); - free (ct); + struct casereader_stateless_translator *cst = cst_; + casereader_destroy (cst->subreader); + cst->class->destroy (cst->aux); + free (cst); +} + +static void +casereader_stateless_translator_advance (struct casereader *reader UNUSED, + void *cst_, casenumber n) +{ + struct casereader_stateless_translator *cst = cst_; + cst->case_offset += casereader_advance (cst->subreader, n); } /* Casereader class for stateless translating casereader. */ @@ -186,7 +203,7 @@ casereader_stateless_translator_class = { casereader_stateless_translator_read, casereader_stateless_translator_destroy, - NULL, + casereader_stateless_translator_advance, }; diff --git a/src/data/dataset.c b/src/data/dataset.c index ded7c7ab02..aa68ae9cb6 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -351,13 +351,12 @@ bool dataset_delete_vars (struct dataset *ds, struct variable **vars, size_t n) { dict_delete_vars (ds->dict, vars, n); - - if (ds->source) - { - struct case_map *map = case_map_to_compact_dict (ds->d, 0); - ds->source = case_map_create_input_translator (map, ds->source); - } + ds->source = case_map_create_input_translator ( + case_map_to_compact_dict (ds->dict, 0), ds->source); dict_compact_values (ds->dict); + caseinit_clear (ds->caseinit); + caseinit_mark_as_preinited (ds->caseinit, ds->dict); + return true; } /* Returns a number unique to DS. It can be used to distinguish one dataset @@ -816,6 +815,12 @@ proc_pop_transformations (struct dataset *ds, struct trns_chain *chain) *chain = ds->stack[--ds->n_stack]; } +bool +proc_has_transformations (const struct dataset *ds) +{ + return ds->permanent_trns_chain.n || ds->temporary_trns_chain.n; +} + static enum trns_result store_case_num (void *var_, struct ccase **cc, casenumber case_num) { diff --git a/src/data/dataset.h b/src/data/dataset.h index 87fe1c3831..97a77441cd 100644 --- a/src/data/dataset.h +++ b/src/data/dataset.h @@ -23,6 +23,7 @@ #include "data/transformations.h" struct casereader; +struct casereader_translator_class; struct dataset; struct dictionary; struct session; @@ -48,8 +49,6 @@ bool dataset_has_source (const struct dataset *ds); bool dataset_set_source (struct dataset *, struct casereader *); struct casereader *dataset_steal_source (struct dataset *); -bool dataset_delete_vars (struct dataset *, struct variable **, size_t n); - unsigned int dataset_seqno (const struct dataset *); struct dataset_callbacks @@ -86,12 +85,18 @@ void add_transformation (struct dataset *ds, const struct trns_class *, void *); bool proc_cancel_all_transformations (struct dataset *ds); void proc_push_transformations (struct dataset *); void proc_pop_transformations (struct dataset *, struct trns_chain *); +bool proc_has_transformations (const struct dataset *); void proc_start_temporary_transformations (struct dataset *ds); bool proc_in_temporary_transformations (const struct dataset *ds); bool proc_make_temporary_transformations_permanent (struct dataset *ds); bool proc_cancel_temporary_transformations (struct dataset *ds); struct variable *add_permanent_ordering_transformation (struct dataset *); + +bool dataset_transform_source (struct dataset *, + const struct casereader_translator_class *, + void *aux); +bool dataset_delete_vars (struct dataset *, struct variable **, size_t n); /* Procedures. */ diff --git a/src/language/commands/autorecode.c b/src/language/commands/autorecode.c index 0cd81ac920..2e575f7b45 100644 --- a/src/language/commands/autorecode.c +++ b/src/language/commands/autorecode.c @@ -67,7 +67,7 @@ struct arc_spec int src_idx; /* Case index of source variable. */ char *src_name; /* Name of source variable. */ struct fmt_spec format; /* Print format in source variable. */ - struct variable *dst; /* Target variable. */ + size_t dst_idx; /* Target variable. */ struct missing_values mv; /* Missing values of source variable. */ char *label; /* Variable label of source variable. */ struct rec_items *items; @@ -90,13 +90,15 @@ struct rec_items /* AUTORECODE data. */ struct autorecode_pgm { + struct caseproto *proto; + struct arc_spec *specs; size_t n_specs; bool blank_valid; }; -static const struct trns_class autorecode_trns_class; +static const struct casereader_translator_class autorecode_trns_class; static int compare_arc_items (const void *, const void *, const void *aux); static void arc_free (struct autorecode_pgm *); @@ -131,8 +133,10 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) bool print = false; /* Create procedure. */ - struct autorecode_pgm *arc = XZALLOC (struct autorecode_pgm); - arc->blank_valid = true; + struct autorecode_pgm *arc = xmalloc (sizeof *arc); + *arc = (struct autorecode_pgm) { + .blank_valid = true, + }; /* Parse variable lists. */ lex_match_id (lexer, "VARIABLES"); @@ -331,14 +335,15 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) struct arc_spec *spec = &arc->specs[i]; /* Create destination variable. */ - spec->dst = dict_create_var_assert (dict, dst_names[i], 0); - var_set_label (spec->dst, spec->label); + struct variable *dst = dict_create_var_assert (dict, dst_names[i], 0); + spec->dst_idx = var_get_case_index (dst); + var_set_label (dst, spec->label); /* Set print format. */ size_t n_items = hmap_count (&spec->items->ht); char *longest_value = xasprintf ("%zu", n_items); struct fmt_spec format = { .type = FMT_F, .w = strlen (longest_value) }; - var_set_both_formats (spec->dst, &format); + var_set_both_formats (dst, &format); free (longest_value); /* Create array of pointers to items. */ @@ -364,11 +369,11 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) : spec->label && spec->label[0] ? pivot_value_new_text_format (N_("Recoding %s into %s (%s)."), spec->src_name, - var_get_name (spec->dst), + var_get_name (dst), spec->label) : pivot_value_new_text_format (N_("Recoding %s into %s."), spec->src_name, - var_get_name (spec->dst))); + var_get_name (dst))); struct pivot_table *table = pivot_table_create__ (title, "Recoding"); pivot_dimension_create ( @@ -431,7 +436,7 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) else for (size_t k = 0; k < n_missing; k++) mv_add_num (&mv, lo + k); - var_set_missing_values (spec->dst, &mv); + var_set_missing_values (dst, &mv); mv_destroy (&mv); } @@ -442,14 +447,17 @@ cmd_autorecode (struct lexer *lexer, struct dataset *ds) if (value_label && value_label[0]) { union value to_val = { .f = items[j]->to }; - var_add_value_label (spec->dst, &to_val, value_label); + var_add_value_label (dst, &to_val, value_label); } } /* Free array. */ free (items); } - add_transformation (ds, &autorecode_trns_class, arc); + arc->proto = caseproto_ref (dict_get_proto (dict)); + dataset_set_source (ds, casereader_translate_stateless ( + dataset_steal_source (ds), arc->proto, + &autorecode_trns_class, arc)); for (size_t i = 0; i < n_dsts; i++) free (dst_names[i]); @@ -552,25 +560,24 @@ compare_arc_items (const void *a_, const void *b_, const void *direction_) return direction == ASCENDING ? cmp : -cmp; } -static enum trns_result -autorecode_trns_proc (void *arc_, struct ccase **c, - casenumber case_idx UNUSED) +static struct ccase * +autorecode_translate (struct ccase *c, void *arc_) { struct autorecode_pgm *arc = arc_; - *c = case_unshare (*c); + c = case_unshare_and_resize (c, arc->proto); for (size_t i = 0; i < arc->n_specs; i++) { const struct arc_spec *spec = &arc->specs[i]; - const union value *value = case_data_idx (*c, spec->src_idx); + const union value *value = case_data_idx (c, spec->src_idx); int width = value_trim_spaces (value, spec->width); size_t hash = value_hash (value, width, 0); const struct arc_item *item = find_arc_item (spec->items, value, width, hash); - *case_num_rw (*c, spec->dst) = item ? item->to : SYSMIS; + *case_num_rw_idx (c, spec->dst_idx) = item ? item->to : SYSMIS; } - return TRNS_CONTINUE; + return c; } static bool @@ -578,12 +585,12 @@ autorecode_trns_free (void *arc_) { struct autorecode_pgm *arc = arc_; + caseproto_unref (arc->proto); arc_free (arc); return true; } -static const struct trns_class autorecode_trns_class = { - .name = "AUTORECODE", - .execute = autorecode_trns_proc, +static const struct casereader_translator_class autorecode_trns_class = { + .translate = autorecode_translate, .destroy = autorecode_trns_free, }; diff --git a/src/language/commands/delete-variables.c b/src/language/commands/delete-variables.c index 03ccd115fe..b7ca748256 100644 --- a/src/language/commands/delete-variables.c +++ b/src/language/commands/delete-variables.c @@ -33,81 +33,38 @@ int cmd_delete_variables (struct lexer *lexer, struct dataset *ds) { + if (proc_has_transformations (ds)) + { + lex_ofs_error (lexer, 0, lex_ofs (lexer) - 1, + _("%s may not be used when there are pending " + "transformations (use %s to execute transformations)."), + "DELETE VARIABLES", "EXECUTE"); + return CMD_FAILURE; + } + if (proc_in_temporary_transformations (ds)) + { + lex_ofs_error (lexer, 0, lex_ofs (lexer) - 1, + _("%s may not be used after %s."), + "DELETE VARIABLES", "TEMPORARY"); + return CMD_FAILURE; + } + struct variable **vars; size_t n_vars; - bool ok; - - if (proc_make_temporary_transformations_permanent (ds)) - lex_ofs_error (lexer, 0, lex_ofs (lexer) - 1, - _("%s may not be used after %s. " - "Temporary transformations will be made permanent."), - "DELETE VARIABLES", "TEMPORARY"); - if (!parse_variables (lexer, dataset_dict (ds), &vars, &n_vars, PV_NONE)) - goto error; + return CMD_FAILURE; if (n_vars == dict_get_n_vars (dataset_dict (ds))) { lex_ofs_error (lexer, 0, lex_ofs (lexer) - 1, _("%s may not be used to delete all variables " "from the active dataset dictionary. " "Use %s instead."), "DELETE VARIABLES", "NEW FILE"); - goto error; + free (vars); + return CMD_FAILURE; } - ok = casereader_destroy (proc_open_filtering (ds, false)); - ok = proc_commit (ds) && ok; - if (!ok) - goto error; - - dict_delete_vars (dataset_dict (ds), vars, n_vars); - - /* XXX A bunch of bugs conspire to make executing transformations again here - necessary, even though it shouldn't be. - - Consider the following (which is included in delete-variables.at): - - DATA LIST NOTABLE /s1 TO s2 1-2(A). - BEGIN DATA - 12 - END DATA. - DELETE VARIABLES s1. - NUMERIC n1. - LIST. - - The DATA LIST gives us a caseproto with widths 1,1. DELETE VARIABLES - deletes the first variable so we now have -1,1. This already is - technically a problem because proc_casereader_read() calls - case_unshare_and_resize() from the former to the latter caseproto, and - these caseprotos are not conformable (which is a requirement for - case_resize()). It doesn't cause an assert by default because - case_resize() uses expensive_assert() to check for it though. However, in - practice we don't see a problem yet because case_resize() only does work - if the number of widths in the source and dest caseproto are different. - - Executing NUMERIC adds a third variable, though, so we have -1,1,0. This - makes caseproto_resize() notice that there are fewer strings in the new - caseproto. Therefore it destroys the second one (s2). It should destroy - the first one (s1), but if the caseprotos were really conformable then it - would have destroyed the right one. This mistake eventually causes a bad - memory reference. - - Executing transformations a second time after DELETE VARIABLES, like we do - below, works around the problem because we can never run into a situation - where we've got both new variables (triggering a resize) and deleted - variables (triggering the bad free). - - We should fix this in a better way. Doing it cleanly seems hard. This - seems to work for now. */ - ok = casereader_destroy (proc_open_filtering (ds, false)); - ok = proc_commit (ds) && ok; - if (!ok) - goto error; - + dataset_delete_vars (ds, vars, n_vars); free (vars); return CMD_SUCCESS; - - error: - free (vars); - return CMD_CASCADING_FAILURE; } diff --git a/src/language/commands/rank.c b/src/language/commands/rank.c index 622354db2f..7d04e080b4 100644 --- a/src/language/commands/rank.c +++ b/src/language/commands/rank.c @@ -820,6 +820,8 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) /* RANK transformation. */ struct rank_trns { + struct caseproto *proto; + int order_case_idx; struct rank_trns_input_var *input_vars; @@ -833,7 +835,7 @@ struct rank_trns_input_var struct casereader *input; struct ccase *current; - struct variable **output_vars; + size_t *output_var_indexes; }; static void @@ -843,48 +845,52 @@ advance_ranking (struct rank_trns_input_var *iv) iv->current = casereader_read (iv->input); } -static enum trns_result -rank_trns_proc (void *trns_, struct ccase **c, casenumber case_idx UNUSED) +static struct ccase * +rank_translate (struct ccase *c, void *trns_) { struct rank_trns *trns = trns_; - double order = case_num_idx (*c, trns->order_case_idx); - struct rank_trns_input_var *iv; - - *c = case_unshare (*c); - for (iv = trns->input_vars; iv < &trns->input_vars[trns->n_input_vars]; iv++) - while (iv->current != NULL) - { - double iv_order = case_num_idx (iv->current, 0); - if (iv_order == order) - { - size_t i; - - for (i = 0; i < trns->n_funcs; i++) - *case_num_rw (*c, iv->output_vars[i]) - = case_num_idx (iv->current, i + 1); - advance_ranking (iv); + + c = case_unshare_and_resize (c, trns->proto); + double order = case_num_idx (c, trns->order_case_idx); + for (struct rank_trns_input_var *iv = trns->input_vars; + iv < &trns->input_vars[trns->n_input_vars]; iv++) + { + for (size_t i = 0; i < trns->n_funcs; i++) + *case_num_rw_idx (c, iv->output_var_indexes[i]) = SYSMIS; + + while (iv->current != NULL) + { + double iv_order = case_num_idx (iv->current, 0); + if (iv_order == order) + { + for (size_t i = 0; i < trns->n_funcs; i++) + *case_num_rw_idx (c, iv->output_var_indexes[i]) + = case_num_idx (iv->current, i + 1); + advance_ranking (iv); + break; + } + else if (iv_order > order) break; - } - else if (iv_order > order) - break; - else - advance_ranking (iv); + else + advance_ranking (iv); + } } - return TRNS_CONTINUE; + return c; } static bool rank_trns_free (void *trns_) { struct rank_trns *trns = trns_; - struct rank_trns_input_var *iv; - for (iv = trns->input_vars; iv < &trns->input_vars[trns->n_input_vars]; iv++) + caseproto_unref (trns->proto); + for (struct rank_trns_input_var *iv = trns->input_vars; + iv < &trns->input_vars[trns->n_input_vars]; iv++) { casereader_destroy (iv->input); case_unref (iv->current); - free (iv->output_vars); + free (iv->output_var_indexes); } free (trns->input_vars); free (trns); @@ -892,9 +898,8 @@ rank_trns_free (void *trns_) return true; } -static const struct trns_class rank_trns_class = { - .name = "RANK", - .execute = rank_trns_proc, +static const struct casereader_translator_class rank_trns_class = { + .translate = rank_translate, .destroy = rank_trns_free, }; @@ -1016,17 +1021,20 @@ rank_cmd (struct dataset *ds, const struct rank *cmd) /* Merge the original data set with the ranks (which we already sorted on $ORDER). */ struct rank_trns *trns = xmalloc (sizeof *trns); - trns->order_case_idx = var_get_case_index (order_var); - trns->input_vars = xnmalloc (cmd->n_vars, sizeof *trns->input_vars); - trns->n_input_vars = cmd->n_vars; - trns->n_funcs = cmd->n_rs; + *trns = (struct rank_trns) { + .order_case_idx = var_get_case_index (order_var), + .input_vars = xnmalloc (cmd->n_vars, sizeof *trns->input_vars), + .n_input_vars = cmd->n_vars, + .n_funcs = cmd->n_rs, + }; for (size_t i = 0; i < trns->n_input_vars; i++) { struct rank_trns_input_var *iv = &trns->input_vars[i]; iv->input = casewriter_make_reader (outputs[i]); iv->current = casereader_read (iv->input); - iv->output_vars = xnmalloc (trns->n_funcs, sizeof *iv->output_vars); + iv->output_var_indexes = xnmalloc (trns->n_funcs, + sizeof *iv->output_var_indexes); for (size_t j = 0; j < trns->n_funcs; j++) { struct rank_spec *rs = &cmd->rs[j]; @@ -1037,15 +1045,18 @@ rank_cmd (struct dataset *ds, const struct rank *cmd) var_set_label (var, rs->dest_labels[i]); var_set_measure (var, rank_measures[rs->rfunc]); - iv->output_vars[j] = var; + iv->output_var_indexes[j] = var_get_case_index (var); } } free (outputs); - add_transformation (ds, &rank_trns_class, trns); + trns->proto = caseproto_ref (dict_get_proto (d)), + dataset_set_source (ds, casereader_translate_stateless ( + dataset_steal_source (ds), trns->proto, + &rank_trns_class, trns)); /* Delete our sort key, which we don't need anymore. */ - dict_delete_var (d, order_var); + dataset_delete_vars (ds, &order_var, 1); return ok; } diff --git a/tests/language/commands/autorecode.at b/tests/language/commands/autorecode.at index f639b078a4..aa151a89ce 100644 --- a/tests/language/commands/autorecode.at +++ b/tests/language/commands/autorecode.at @@ -314,7 +314,6 @@ autorecode x y into a b delete variables x y. list. - ]) AT_CHECK([pspp -O format=csv strings.sps], [0], diff --git a/tests/language/commands/delete-variables.at b/tests/language/commands/delete-variables.at index b5cda6e9d0..2915e21391 100644 --- a/tests/language/commands/delete-variables.at +++ b/tests/language/commands/delete-variables.at @@ -69,20 +69,26 @@ DATA LIST LIST NOTABLE /x y z. BEGIN DATA. 1 2 3 END DATA. +DELETE VARIABLES x y z. TEMPORARY. DELETE VARIABLES x. -DELETE VARIABLES y z. +COMPUTE y=0. +DELETE VARIABLES x. ]) AT_DATA([insert.sps], [dnl INSERT FILE='delete-variables.sps' ERROR=IGNORE. ]) AT_CHECK([pspp --testing-mode -O format=csv insert.sps], [1], [dnl -"delete-variables.sps:6.1-6.16: error: DELETE VARIABLES: DELETE VARIABLES may not be used after TEMPORARY. Temporary transformations will be made permanent. - 6 | DELETE VARIABLES x. +"delete-variables.sps:5.1-5.22: error: DELETE VARIABLES: DELETE VARIABLES may not be used to delete all variables from the active dataset dictionary. Use NEW FILE instead. + 5 | DELETE VARIABLES x y z. + | ^~~~~~~~~~~~~~~~~~~~~~" + +"delete-variables.sps:7.1-7.16: error: DELETE VARIABLES: DELETE VARIABLES may not be used after TEMPORARY. + 7 | DELETE VARIABLES x. | ^~~~~~~~~~~~~~~~" -"delete-variables.sps:7.1-7.20: error: DELETE VARIABLES: DELETE VARIABLES may not be used to delete all variables from the active dataset dictionary. Use NEW FILE instead. - 7 | DELETE VARIABLES y z. - | ^~~~~~~~~~~~~~~~~~~~" +"delete-variables.sps:9.1-9.16: error: DELETE VARIABLES: DELETE VARIABLES may not be used when there are pending transformations (use EXECUTE to execute transformations). + 9 | DELETE VARIABLES x. + | ^~~~~~~~~~~~~~~~" ]) AT_CLEANUP \ No newline at end of file