X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdataset.c;fp=src%2Fdata%2Fdataset.c;h=b2e031fc6ace0408541347ba4c04596322a77824;hb=b1dd857c0580ce963b28ee3a9573ae592a0a7dde;hp=cbff74088b62d470c924d27e9e5f3c6f6a9dceff;hpb=708f2f1b12056e77b03749aa7b5931dc07819356;p=pspp diff --git a/src/data/dataset.c b/src/data/dataset.c index cbff74088b..b2e031fc6a 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -36,6 +36,8 @@ #include "data/transformations.h" #include "data/variable.h" #include "libpspp/deque.h" +#include "libpspp/hash-functions.h" +#include "libpspp/hmap.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "libpspp/taint.h" @@ -116,6 +118,8 @@ static void dataset_changed__ (struct dataset *); static void dataset_transformations_changed__ (struct dataset *, bool non_empty); +static void add_measurement_level_trns (struct dataset *, struct dictionary *); +static void cancel_measurement_level_trns (struct trns_chain *); static void add_case_limit_trns (struct dataset *ds); static void add_filter_trns (struct dataset *ds); @@ -177,6 +181,7 @@ dataset_clone (struct dataset *old, const char *name) assert (old->sink == NULL); assert (!old->temporary); assert (!old->temporary_trns_chain.n); + assert (!old->n_stack); new = xzalloc (sizeof *new); new->name = xstrdup (name); @@ -421,6 +426,7 @@ proc_open_filtering (struct dataset *ds, bool filter) { struct casereader *reader; + assert (ds->n_stack == 0); assert (ds->source != NULL); assert (ds->proc_state == PROC_COMMITTED); @@ -432,6 +438,8 @@ proc_open_filtering (struct dataset *ds, bool filter) add_case_limit_trns (ds); if (filter) add_filter_trns (ds); + if (!proc_in_temporary_transformations (ds)) + add_measurement_level_trns (ds, ds->dict); /* Make permanent_dict refer to the dictionary right before data reaches the sink. */ @@ -609,6 +617,7 @@ proc_commit (struct dataset *ds) /* Dictionary from before TEMPORARY becomes permanent. */ proc_cancel_temporary_transformations (ds); + bool ok = proc_cancel_all_transformations (ds) && ds->ok; if (!ds->discard_output) { @@ -638,7 +647,7 @@ proc_commit (struct dataset *ds) dict_clear_vectors (ds->dict); ds->permanent_dict = NULL; - return proc_cancel_all_transformations (ds) && ds->ok; + return ok; } /* Casereader class for procedure execution. */ @@ -699,11 +708,13 @@ proc_in_temporary_transformations (const struct dataset *ds) void proc_start_temporary_transformations (struct dataset *ds) { + assert (!ds->n_stack); if (!proc_in_temporary_transformations (ds)) { add_case_limit_trns (ds); ds->permanent_dict = dict_clone (ds->dict); + add_measurement_level_trns (ds, ds->permanent_dict); ds->temporary = true; dataset_transformations_changed__ (ds, true); @@ -723,6 +734,7 @@ proc_make_temporary_transformations_permanent (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { + cancel_measurement_level_trns (&ds->permanent_trns_chain); trns_chain_splice (&ds->permanent_trns_chain, &ds->temporary_trns_chain); ds->temporary = false; @@ -744,12 +756,14 @@ proc_cancel_temporary_transformations (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { + trns_chain_clear (&ds->temporary_trns_chain); + + + /* XXX remove measurement level transformation from permanent_trns_chain */ dict_unref (ds->dict); ds->dict = ds->permanent_dict; ds->permanent_dict = NULL; - trns_chain_clear (&ds->temporary_trns_chain); - dataset_transformations_changed__ (ds, ds->permanent_trns_chain.n != 0); return true; } @@ -938,6 +952,200 @@ dataset_need_lag (struct dataset *ds, int n_before) ds->n_lag = MAX (ds->n_lag, n_before); } +/* Transformation for adding measurement level. */ + +struct measurement_level_value + { + struct hmap_node hmap_node; + double value; + }; + +struct measurement_level_var + { + struct variable *var; + struct hmap *values; + }; + +static void +add_measurement_level_var_uninit (struct measurement_level_var *mlv) +{ + struct measurement_level_value *mlvalue, *next; + HMAP_FOR_EACH_SAFE (mlvalue, next, struct measurement_level_value, hmap_node, + mlv->values) + { + hmap_delete (mlv->values, &mlvalue->hmap_node); + free (mlvalue); + } + hmap_destroy (mlv->values); + free (mlv->values); +} + +static enum measure +add_measurement_level_var_interpret (const struct measurement_level_var *mlv) +{ + size_t n = hmap_count (mlv->values); + if (!n) + { + /* All missing (or no data). */ + return MEASURE_NOMINAL; + } + + const struct measurement_level_value *mlvalue; + HMAP_FOR_EACH (mlvalue, struct measurement_level_value, hmap_node, + mlv->values) + if (mlvalue->value < 10) + return MEASURE_NOMINAL; + return MEASURE_SCALE; +} + +struct measurement_level_trns + { + struct measurement_level_var *vars; + size_t n_vars; + }; + +static enum measure +add_measurement_level_trns_proc__ (struct measurement_level_var *mlv, double value) +{ + if (var_is_num_missing (mlv->var, value)) + return MEASURE_UNKNOWN; + else if (value < 0 || value != floor (value)) + return MEASURE_SCALE; + + size_t hash = hash_double (value, 0); + struct measurement_level_value *mlvalue; + HMAP_FOR_EACH_WITH_HASH (mlvalue, struct measurement_level_value, hmap_node, + hash, mlv->values) + if (mlvalue->value == value) + return MEASURE_UNKNOWN; + + mlvalue = xmalloc (sizeof *mlvalue); + mlvalue->value = value; + hmap_insert (mlv->values, &mlvalue->hmap_node, hash); + if (hmap_count (mlv->values) >= settings_get_scalemin ()) + return MEASURE_SCALE; + + return MEASURE_UNKNOWN; +} + +static enum trns_result +add_measurement_level_trns_proc (void *mlt_, struct ccase **c, + casenumber case_nr UNUSED) +{ + struct measurement_level_trns *mlt = mlt_; + for (size_t i = 0; i < mlt->n_vars; ) + { + struct measurement_level_var *mlv = &mlt->vars[i]; + double value = case_num (*c, mlv->var); + enum measure m = add_measurement_level_trns_proc__ (mlv, value); + if (m != MEASURE_UNKNOWN) + { + var_set_measure (mlv->var, m); + + add_measurement_level_var_uninit (mlv); + *mlv = mlt->vars[--mlt->n_vars]; + } + else + i++; + } + return TRNS_CONTINUE; +} + +static void +add_measurement_level_trns_free__ (struct measurement_level_trns *mlt) +{ + for (size_t i = 0; i < mlt->n_vars; i++) + { + struct measurement_level_var *mlv = &mlt->vars[i]; + var_set_measure (mlv->var, add_measurement_level_var_interpret (mlv)); + add_measurement_level_var_uninit (mlv); + } + free (mlt->vars); + free (mlt); +} + +static bool +add_measurement_level_trns_free (void *mlt_) +{ + struct measurement_level_trns *mlt = mlt_; + for (size_t i = 0; i < mlt->n_vars; i++) + { + struct measurement_level_var *mlv = &mlt->vars[i]; + var_set_measure (mlv->var, add_measurement_level_var_interpret (mlv)); + } + add_measurement_level_trns_free__ (mlt); + return true; +} + +static const struct trns_class add_measurement_level_trns_class = { + .name = "add measurement level", + .execute = add_measurement_level_trns_proc, + .destroy = add_measurement_level_trns_free, +}; + +static void +add_measurement_level_trns (struct dataset *ds, struct dictionary *dict) +{ + struct variable **vars = NULL; + size_t n_vars = 0; + size_t allocated_vars = 0; + + for (size_t i = 0; i < dict_get_n_vars (dict); i++) + { + struct variable *var = dict_get_var (dict, i); + if (var_get_measure (var) != MEASURE_UNKNOWN) + continue; + + const struct fmt_spec *f = var_get_print_format (var); + enum measure m = var_default_measure_for_format (f->type); + if (m != MEASURE_UNKNOWN) + { + var_set_measure (var, m); + continue; + } + + if (n_vars >= allocated_vars) + vars = x2nrealloc (vars, &allocated_vars, sizeof *vars); + vars[n_vars++] = var; + } + + if (!n_vars) + return; + + /* We do this as a second step because otherwise we'd be moving hmaps around, + which doesn't work. */ + struct measurement_level_var *mlvs = xmalloc (n_vars * sizeof *mlvs); + for (size_t i = 0; i < n_vars; i++) + { + mlvs[i].var = vars[i]; + mlvs[i].values = xmalloc (sizeof *mlvs[i].values); + hmap_init (mlvs[i].values); + } + free (vars); + + struct measurement_level_trns *mlt = xmalloc (sizeof *mlt); + *mlt = (struct measurement_level_trns) { + .vars = mlvs, + .n_vars = n_vars, + }; + add_transformation (ds, &add_measurement_level_trns_class, mlt); +} + +static void +cancel_measurement_level_trns (struct trns_chain *chain) +{ + if (!chain->n) + return; + + struct transformation *trns = &chain->xforms[chain->n - 1]; + if (trns->class != &add_measurement_level_trns_class) + return; + + struct measurement_level_trns *mlt = trns->aux; + add_measurement_level_trns_free__ (mlt); + chain->n--; +} + static void dataset_changed__ (struct dataset *ds) {