From 12cc305385f5876ab813d340cca6c8a5c106b005 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 29 Jul 2022 15:55:30 -0700 Subject: [PATCH] more work --- src/data/dataset.c | 280 ++++++++++++++++++++++----------------------- 1 file changed, 140 insertions(+), 140 deletions(-) diff --git a/src/data/dataset.c b/src/data/dataset.c index 931d3dcbbf..b5851a37bf 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -950,156 +950,89 @@ dataset_need_lag (struct dataset *ds, int n_before) ds->n_lag = MAX (ds->n_lag, n_before); } -/* Transformation for adding measurement level. */ +/* Measurement guesser, for guessing a measurement level from formats and + data. */ -struct measurement_level_value +struct mg_value { struct hmap_node hmap_node; double value; }; -struct measurement_level_var +struct mg_var { struct variable *var; struct hmap *values; }; static void -add_measurement_level_var_uninit (struct measurement_level_var *mlv) +mg_var_uninit (struct mg_var *mgv) { - struct measurement_level_value *mlvalue, *next; - HMAP_FOR_EACH_SAFE (mlvalue, next, struct measurement_level_value, hmap_node, - mlv->values) + struct mg_value *mgvalue, *next; + HMAP_FOR_EACH_SAFE (mgvalue, next, struct mg_value, hmap_node, + mgv->values) { - hmap_delete (mlv->values, &mlvalue->hmap_node); - free (mlvalue); + hmap_delete (mgv->values, &mgvalue->hmap_node); + free (mgvalue); } - hmap_destroy (mlv->values); - free (mlv->values); + hmap_destroy (mgv->values); + free (mgv->values); } static enum measure -add_measurement_level_var_interpret (const struct measurement_level_var *mlv) +mg_var_interpret (const struct mg_var *mgv) { - size_t n = hmap_count (mlv->values); + size_t n = hmap_count (mgv->values); if (!n) { /* All missing (or no data). */ return MEASURE_NOMINAL; } - const struct measurement_level_value *mlvalue; - HMAP_FOR_EACH (mlvalue, struct measurement_level_value, hmap_node, - mlv->values) - if (mlvalue->value < 10) + const struct mg_value *mgvalue; + HMAP_FOR_EACH (mgvalue, struct mg_value, hmap_node, + mgv->values) + if (mgvalue->value < 10) return MEASURE_NOMINAL; return MEASURE_SCALE; } -struct measurement_level_trns - { - struct measurement_level_var *vars; - size_t n_vars; - }; - static enum measure -add_measurement_level_trns_proc__ (struct measurement_level_var *mlv, double value) +mg_var_add_value (struct mg_var *mgv, double value) { - if (var_is_num_missing (mlv->var, value)) + if (var_is_num_missing (mgv->var, value)) return MEASURE_UNKNOWN; else if (value < 0 || value != floor (value)) return MEASURE_SCALE; size_t hash = hash_double (value, 0); - struct measurement_level_value *mlvalue; - HMAP_FOR_EACH_WITH_HASH (mlvalue, struct measurement_level_value, hmap_node, - hash, mlv->values) - if (mlvalue->value == value) + struct mg_value *mgvalue; + HMAP_FOR_EACH_WITH_HASH (mgvalue, struct mg_value, hmap_node, + hash, mgv->values) + if (mgvalue->value == value) return MEASURE_UNKNOWN; - mlvalue = xmalloc (sizeof *mlvalue); - mlvalue->value = value; - hmap_insert (mlv->values, &mlvalue->hmap_node, hash); - if (hmap_count (mlv->values) >= settings_get_scalemin ()) + mgvalue = xmalloc (sizeof *mgvalue); + mgvalue->value = value; + hmap_insert (mgv->values, &mgvalue->hmap_node, hash); + if (hmap_count (mgv->values) >= settings_get_scalemin ()) return MEASURE_SCALE; return MEASURE_UNKNOWN; } -static void -measurement_level_trns_run (struct measurement_level_trns *mlt, - const struct ccase *c) -{ - for (size_t i = 0; i < mlt->n_vars; ) - { - struct measurement_level_var *mlv = &mlt->vars[i]; - double value = case_num (c, mlv->var); - enum measure m = add_measurement_level_trns_proc__ (mlv, value); - if (m != MEASURE_UNKNOWN) - { - var_set_measure (mlv->var, m); - - add_measurement_level_var_uninit (mlv); - *mlv = mlt->vars[--mlt->n_vars]; - } - else - i++; - } -} - -static enum trns_result -add_measurement_level_trns_proc (void *mlt_, struct ccase **c, - casenumber case_nr UNUSED) -{ - struct measurement_level_trns *mlt = mlt_; - measurement_level_trns_run (mlt, *c); - return TRNS_CONTINUE; -} - -static void -add_measurement_level_trns_free__ (struct measurement_level_trns *mlt) -{ - for (size_t i = 0; i < mlt->n_vars; i++) - { - struct measurement_level_var *mlv = &mlt->vars[i]; - var_set_measure (mlv->var, add_measurement_level_var_interpret (mlv)); - add_measurement_level_var_uninit (mlv); - } - free (mlt->vars); - free (mlt); -} - -static void -measurement_level_trns_commit (struct measurement_level_trns *mlt) -{ - for (size_t i = 0; i < mlt->n_vars; i++) - { - struct measurement_level_var *mlv = &mlt->vars[i]; - var_set_measure (mlv->var, add_measurement_level_var_interpret (mlv)); - } -} - -static bool -add_measurement_level_trns_free (void *mlt_) -{ - struct measurement_level_trns *mlt = mlt_; - measurement_level_trns_commit (mlt); - add_measurement_level_trns_free__ (mlt); - return true; -} - -static const struct trns_class add_measurement_level_trns_class = { - .name = "add measurement level", - .execute = add_measurement_level_trns_proc, - .destroy = add_measurement_level_trns_free, -}; +struct measure_guesser + { + struct mg_var *vars; + size_t n_vars; + }; -static struct measurement_level_trns * -create_measurement_level_trns (struct dictionary *dict) +static struct measure_guesser * +measure_guesser_create (struct dictionary *dict) { - struct measurement_level_var *mlvs = NULL; - size_t n_mlvs = 0; - size_t allocated_mlvs = 0; + struct mg_var *mgvs = NULL; + size_t n_mgvs = 0; + size_t allocated_mgvs = 0; for (size_t i = 0; i < dict_get_n_vars (dict); i++) { @@ -1115,58 +1048,80 @@ create_measurement_level_trns (struct dictionary *dict) continue; } - if (n_mlvs >= allocated_mlvs) - mlvs = x2nrealloc (mlvs, &allocated_mlvs, sizeof *mlvs); + if (n_mgvs >= allocated_mgvs) + mgvs = x2nrealloc (mgvs, &allocated_mgvs, sizeof *mgvs); - struct measurement_level_var *mlv = &mlvs[n_mlvs++]; - *mlv = (struct measurement_level_var) { + struct mg_var *mgv = &mgvs[n_mgvs++]; + *mgv = (struct mg_var) { .var = var, - .values = xmalloc (sizeof *mlv->values), + .values = xmalloc (sizeof *mgv->values), }; - hmap_init (mlv->values); + hmap_init (mgv->values); } - if (!n_mlvs) + if (!n_mgvs) return NULL; - struct measurement_level_trns *mlt = xmalloc (sizeof *mlt); - *mlt = (struct measurement_level_trns) { - .vars = mlvs, - .n_vars = n_mlvs, + struct measure_guesser *mg = xmalloc (sizeof *mg); + *mg = (struct measure_guesser) { + .vars = mgvs, + .n_vars = n_mgvs, }; - return mlt; + return mg; } static void -add_measurement_level_trns (struct dataset *ds, struct dictionary *dict) +measure_guesser_add_case (struct measure_guesser *mg, const struct ccase *c) { - struct measurement_level_trns *mlt = create_measurement_level_trns (dict); - if (mlt) - add_transformation (ds, &add_measurement_level_trns_class, mlt); + for (size_t i = 0; i < mg->n_vars; ) + { + struct mg_var *mgv = &mg->vars[i]; + double value = case_num (c, mgv->var); + enum measure m = mg_var_add_value (mgv, value); + if (m != MEASURE_UNKNOWN) + { + var_set_measure (mgv->var, m); + + mg_var_uninit (mgv); + *mgv = mg->vars[--mg->n_vars]; + } + else + i++; + } } static void -cancel_measurement_level_trns (struct trns_chain *chain) +measure_guesser_destroy (struct measure_guesser *mg) { - if (!chain->n) + if (!mg) return; - struct transformation *trns = &chain->xforms[chain->n - 1]; - if (trns->class != &add_measurement_level_trns_class) - return; + for (size_t i = 0; i < mg->n_vars; i++) + { + struct mg_var *mgv = &mg->vars[i]; + var_set_measure (mgv->var, mg_var_interpret (mgv)); + mg_var_uninit (mgv); + } + free (mg->vars); + free (mg); +} - struct measurement_level_trns *mlt = trns->aux; - add_measurement_level_trns_free__ (mlt); - chain->n--; +static void +measure_guesser_commit (struct measure_guesser *mg) +{ + for (size_t i = 0; i < mg->n_vars; i++) + { + struct mg_var *mgv = &mg->vars[i]; + var_set_measure (mgv->var, mg_var_interpret (mgv)); + } } bool measure_guesser_is_needed (struct dataset *ds) { - struct measurement_level_trns *mlt - = create_measurement_level_trns (dataset_dict (ds)); - if (mlt) + struct measure_guesser *mg = measure_guesser_create (dataset_dict (ds)); + if (mg) { - add_measurement_level_trns_free__ (mlt); + measure_guesser_destroy (mg); return true; } return false; @@ -1175,22 +1130,67 @@ measure_guesser_is_needed (struct dataset *ds) void measure_guesser_run (struct dataset *ds, const struct casereader *reader) { - struct measurement_level_trns *mlt - = create_measurement_level_trns (dataset_dict (ds)); - if (!mlt) + struct measure_guesser *mg = measure_guesser_create (dataset_dict (ds)); + if (!mg) return; struct casereader *r = casereader_clone (reader); - while (mlt->n_vars > 0) + while (mg->n_vars > 0) { struct ccase *c = casereader_read (r); - measurement_level_trns_run (mlt, c); + measure_guesser_add_case (mg, c); case_unref (c); } casereader_destroy (r); - measurement_level_trns_commit (mlt); - add_measurement_level_trns_free__ (mlt); + measure_guesser_commit (mg); + measure_guesser_destroy (mg); +} + +static enum trns_result +mg_trns_proc (void *mg_, struct ccase **c, casenumber case_nr UNUSED) +{ + struct measure_guesser *mg = mg_; + measure_guesser_add_case (mg, *c); + return TRNS_CONTINUE; +} + +static bool +mg_trns_free (void *mg_) +{ + struct measure_guesser *mg = mg_; + measure_guesser_commit (mg); + measure_guesser_destroy (mg); + return true; +} + +static const struct trns_class mg_trns_class = { + .name = "add measurement level", + .execute = mg_trns_proc, + .destroy = mg_trns_free, +}; + +static void +add_measurement_level_trns (struct dataset *ds, struct dictionary *dict) +{ + struct measure_guesser *mg = measure_guesser_create (dict); + if (mg) + add_transformation (ds, &mg_trns_class, mg); +} + +static void +cancel_measurement_level_trns (struct trns_chain *chain) +{ + if (!chain->n) + return; + + struct transformation *trns = &chain->xforms[chain->n - 1]; + if (trns->class != &mg_trns_class) + return; + + struct measure_guesser *mg = trns->aux; + measure_guesser_destroy (mg); + chain->n--; } static void -- 2.30.2