#include "data/transformations.h"
#include "data/variable.h"
#include "libpspp/deque.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/hmap.h"
#include "libpspp/misc.h"
#include "libpspp/str.h"
#include "libpspp/taint.h"
and are finally passed to the procedure. */
struct casereader *source;
struct caseinit *caseinit;
- struct trns_chain *permanent_trns_chain;
+ struct trns_chain permanent_trns_chain;
struct dictionary *permanent_dict;
+ struct variable *order_var;
struct casewriter *sink;
- struct trns_chain *temporary_trns_chain;
+ struct trns_chain temporary_trns_chain;
+ bool temporary;
struct dictionary *dict;
+ /* Stack of transformation chains for DO IF and LOOP and INPUT PROGRAM. */
+ struct trns_chain *stack;
+ size_t n_stack;
+ size_t allocated_stack;
+
/* If true, cases are discarded instead of being written to
sink. */
bool discard_output;
- /* The transformation chain that the next transformation will be
- added to. */
- struct trns_chain *cur_trns_chain;
-
- /* The case map used to compact a case, if necessary;
- otherwise a null pointer. */
- struct case_map *compactor;
-
/* Time at which proc was last invoked. */
time_t last_proc_invocation;
static void dataset_transformations_changed__ (struct dataset *,
bool non_empty);
+static void add_measurement_level_trns (struct dataset *, struct dictionary *);
+static void cancel_measurement_level_trns (struct trns_chain *);
static void add_case_limit_trns (struct dataset *ds);
static void add_filter_trns (struct dataset *ds);
struct dataset *
dataset_create (struct session *session, const char *name)
{
- struct dataset *ds;
-
- ds = xzalloc (sizeof *ds);
- ds->name = xstrdup (name);
- ds->display = DATASET_FRONT;
- ds->dict = dict_create (get_default_encoding ());
-
- ds->caseinit = caseinit_create ();
-
+ struct dataset *ds = XMALLOC (struct dataset);
+ *ds = (struct dataset) {
+ .name = xstrdup (name),
+ .display = DATASET_FRONT,
+ .dict = dict_create (get_default_encoding ()),
+ .caseinit = caseinit_create (),
+ };
dataset_create_finish__ (ds, session);
return ds;
struct dataset *new;
assert (old->proc_state == PROC_COMMITTED);
- assert (trns_chain_is_empty (old->permanent_trns_chain));
+ assert (!old->permanent_trns_chain.n);
assert (old->permanent_dict == NULL);
assert (old->sink == NULL);
- assert (old->temporary_trns_chain == NULL);
+ assert (!old->temporary);
+ assert (!old->temporary_trns_chain.n);
+ assert (!old->n_stack);
new = xzalloc (sizeof *new);
new->name = xstrdup (name);
{
dataset_set_session (ds, NULL);
dataset_clear (ds);
- dict_destroy (ds->dict);
+ dict_unref (ds->dict);
+ dict_unref (ds->permanent_dict);
caseinit_destroy (ds->caseinit);
- trns_chain_destroy (ds->permanent_trns_chain);
+ trns_chain_uninit (&ds->permanent_trns_chain);
+ for (size_t i = 0; i < ds->n_stack; i++)
+ trns_chain_uninit (&ds->stack[i]);
+ free (ds->stack);
dataset_transformations_changed__ (ds, false);
free (ds->name);
free (ds);
dataset_clear (ds);
- dict_destroy (ds->dict);
+ dict_unref (ds->dict);
ds->dict = dict;
dict_set_change_callback (ds->dict, dict_callback, ds);
}
return reader;
}
+void
+dataset_delete_vars (struct dataset *ds, struct variable **vars, size_t n)
+{
+ assert (!proc_in_temporary_transformations (ds));
+ assert (!proc_has_transformations (ds));
+ assert (n < dict_get_n_vars (ds->dict));
+
+ caseinit_mark_for_init (ds->caseinit, ds->dict);
+ ds->source = caseinit_translate_casereader_to_init_vars (
+ ds->caseinit, dict_get_proto (ds->dict), ds->source);
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
+
+ struct case_map_stage *stage = case_map_stage_create (ds->dict);
+ dict_delete_vars (ds->dict, vars, n);
+ ds->source = case_map_create_input_translator (
+ case_map_stage_to_case_map (stage), ds->source);
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
+}
+
+void
+dataset_reorder_vars (struct dataset *ds, struct variable **vars, size_t n)
+{
+ assert (!proc_in_temporary_transformations (ds));
+ assert (!proc_has_transformations (ds));
+ assert (n <= dict_get_n_vars (ds->dict));
+
+ caseinit_mark_for_init (ds->caseinit, ds->dict);
+ ds->source = caseinit_translate_casereader_to_init_vars (
+ ds->caseinit, dict_get_proto (ds->dict), ds->source);
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
+
+ struct case_map_stage *stage = case_map_stage_create (ds->dict);
+ dict_reorder_vars (ds->dict, vars, n);
+ ds->source = case_map_create_input_translator (
+ case_map_stage_to_case_map (stage), ds->source);
+ caseinit_clear (ds->caseinit);
+ caseinit_mark_as_preinited (ds->caseinit, ds->dict);
+}
+
/* Returns a number unique to DS. It can be used to distinguish one dataset
from any other within a given program run, even datasets that do not exist
at the same time. */
time_t
time_of_last_procedure (struct dataset *ds)
{
+ if (!ds)
+ return time (NULL);
if (ds->last_proc_invocation == 0)
update_last_proc_invocation (ds);
return ds->last_proc_invocation;
{
bool ok;
- if ((ds->temporary_trns_chain == NULL
- || trns_chain_is_empty (ds->temporary_trns_chain))
- && trns_chain_is_empty (ds->permanent_trns_chain))
+ if ((!ds->temporary || !ds->temporary_trns_chain.n)
+ && !ds->permanent_trns_chain.n)
{
ds->n_lag = 0;
ds->discard_output = false;
{
struct casereader *reader;
+ assert (ds->n_stack == 0);
assert (ds->source != NULL);
assert (ds->proc_state == PROC_COMMITTED);
update_last_proc_invocation (ds);
caseinit_mark_for_init (ds->caseinit, ds->dict);
+ ds->source = caseinit_translate_casereader_to_init_vars (
+ ds->caseinit, dict_get_proto (ds->dict), ds->source);
/* Finish up the collection of transformations. */
add_case_limit_trns (ds);
if (filter)
add_filter_trns (ds);
- trns_chain_finalize (ds->cur_trns_chain);
+ if (!proc_in_temporary_transformations (ds))
+ add_measurement_level_trns (ds, ds->dict);
/* Make permanent_dict refer to the dictionary right before
data reaches the sink. */
/* Prepare sink. */
if (!ds->discard_output)
{
- struct dictionary *pd = ds->permanent_dict;
- size_t compacted_value_cnt = dict_count_values (pd, 1u << DC_SCRATCH);
- if (compacted_value_cnt < dict_get_next_value_idx (pd))
- {
- struct caseproto *compacted_proto;
- compacted_proto = dict_get_compacted_proto (pd, 1u << DC_SCRATCH);
- ds->compactor = case_map_to_compact_dict (pd, 1u << DC_SCRATCH);
- ds->sink = autopaging_writer_create (compacted_proto);
- caseproto_unref (compacted_proto);
- }
- else
- {
- ds->compactor = NULL;
- ds->sink = autopaging_writer_create (dict_get_proto (pd));
- }
+ struct dictionary *pd = dict_clone (ds->permanent_dict);
+ struct case_map_stage *stage = case_map_stage_create (pd);
+ dict_delete_scratch_vars (pd);
+ ds->sink = case_map_create_output_translator (
+ case_map_stage_to_case_map (stage),
+ autopaging_writer_create (dict_get_proto (pd)));
+ dict_unref (pd);
}
else
- {
- ds->compactor = NULL;
- ds->sink = NULL;
- }
+ ds->sink = NULL;
/* Allocate memory for lagged cases. */
ds->lag_cases = deque_init (&ds->lag, ds->n_lag, sizeof *ds->lag_cases);
assert (ds->proc_state == PROC_OPEN);
for (; ; case_unref (c))
{
- casenumber case_nr;
-
assert (retval == TRNS_DROP_CASE || retval == TRNS_ERROR);
if (retval == TRNS_ERROR)
ds->ok = false;
if (c == NULL)
return NULL;
c = case_unshare_and_resize (c, dict_get_proto (ds->dict));
- caseinit_init_vars (ds->caseinit, c);
+ caseinit_restore_left_vars (ds->caseinit, c);
/* Execute permanent transformations. */
- case_nr = ds->cases_written + 1;
- retval = trns_chain_execute (ds->permanent_trns_chain, TRNS_CONTINUE,
- &c, case_nr);
- caseinit_update_left_vars (ds->caseinit, c);
+ casenumber case_nr = ds->cases_written + 1;
+ retval = trns_chain_execute (&ds->permanent_trns_chain, case_nr, &c);
+ caseinit_save_left_vars (ds->caseinit, c);
if (retval != TRNS_CONTINUE)
continue;
/* Write case to replacement dataset. */
ds->cases_written++;
if (ds->sink != NULL)
- casewriter_write (ds->sink,
- case_map_execute (ds->compactor, case_ref (c)));
+ {
+ if (ds->order_var)
+ *case_num_rw (c, ds->order_var) = case_nr;
+ casewriter_write (ds->sink, case_ref (c));
+ }
/* Execute temporary transformations. */
- if (ds->temporary_trns_chain != NULL)
+ if (ds->temporary_trns_chain.n)
{
- retval = trns_chain_execute (ds->temporary_trns_chain, TRNS_CONTINUE,
- &c, ds->cases_written);
+ retval = trns_chain_execute (&ds->temporary_trns_chain,
+ ds->cases_written, &c);
if (retval != TRNS_CONTINUE)
continue;
}
/* Dictionary from before TEMPORARY becomes permanent. */
proc_cancel_temporary_transformations (ds);
+ bool ok = proc_cancel_all_transformations (ds) && ds->ok;
if (!ds->discard_output)
{
- /* Finish compacting. */
- if (ds->compactor != NULL)
- {
- case_map_destroy (ds->compactor);
- ds->compactor = NULL;
-
- dict_delete_scratch_vars (ds->dict);
- dict_compact_values (ds->dict);
- }
+ dict_delete_scratch_vars (ds->dict);
/* Old data sink becomes new data source. */
if (ds->sink != NULL)
dict_clear_vectors (ds->dict);
ds->permanent_dict = NULL;
- return proc_cancel_all_transformations (ds) && ds->ok;
+ ds->order_var = NULL;
+ return ok;
}
/* Casereader class for procedure execution. */
return NULL;
}
\f
-/* Returns the current set of permanent transformations,
- and clears the permanent transformations.
- For use by INPUT PROGRAM. */
-struct trns_chain *
-proc_capture_transformations (struct dataset *ds)
-{
- struct trns_chain *chain;
-
- assert (ds->temporary_trns_chain == NULL);
- chain = ds->permanent_trns_chain;
- ds->cur_trns_chain = ds->permanent_trns_chain = trns_chain_create ();
- dataset_transformations_changed__ (ds, false);
-
- return chain;
-}
-
-/* Adds a transformation that processes a case with PROC and
- frees itself with FREE to the current set of transformations.
- The functions are passed AUX as auxiliary data. */
-void
-add_transformation (struct dataset *ds, trns_proc_func *proc, trns_free_func *free, void *aux)
-{
- trns_chain_append (ds->cur_trns_chain, NULL, proc, free, aux);
- dataset_transformations_changed__ (ds, true);
-}
-
-/* Adds a transformation that processes a case with PROC and
- frees itself with FREE to the current set of transformations.
- When parsing of the block of transformations is complete,
- FINALIZE will be called.
- The functions are passed AUX as auxiliary data. */
+/* Adds TRNS to the current set of transformations. */
void
-add_transformation_with_finalizer (struct dataset *ds,
- trns_finalize_func *finalize,
- trns_proc_func *proc,
- trns_free_func *free, void *aux)
+add_transformation (struct dataset *ds,
+ const struct trns_class *class, void *aux)
{
- trns_chain_append (ds->cur_trns_chain, finalize, proc, free, aux);
+ struct trns_chain *chain = (ds->n_stack > 0 ? &ds->stack[ds->n_stack - 1]
+ : ds->temporary ? &ds->temporary_trns_chain
+ : &ds->permanent_trns_chain);
+ struct transformation t = { .class = class, .aux = aux };
+ trns_chain_append (chain, &t);
dataset_transformations_changed__ (ds, true);
}
-/* Returns the index of the next transformation.
- This value can be returned by a transformation procedure
- function to indicate a "jump" to that transformation. */
-size_t
-next_transformation (const struct dataset *ds)
-{
- return trns_chain_next (ds->cur_trns_chain);
-}
-
/* Returns true if the next call to add_transformation() will add
a temporary transformation, false if it will add a permanent
transformation. */
bool
proc_in_temporary_transformations (const struct dataset *ds)
{
- return ds->temporary_trns_chain != NULL;
+ return ds->temporary;
}
/* Marks the start of temporary transformations.
void
proc_start_temporary_transformations (struct dataset *ds)
{
+ assert (!ds->n_stack);
if (!proc_in_temporary_transformations (ds))
{
add_case_limit_trns (ds);
ds->permanent_dict = dict_clone (ds->dict);
+ add_measurement_level_trns (ds, ds->permanent_dict);
- trns_chain_finalize (ds->permanent_trns_chain);
- ds->temporary_trns_chain = ds->cur_trns_chain = trns_chain_create ();
+ ds->temporary = true;
dataset_transformations_changed__ (ds, true);
}
}
{
if (proc_in_temporary_transformations (ds))
{
- trns_chain_finalize (ds->temporary_trns_chain);
- trns_chain_splice (ds->permanent_trns_chain, ds->temporary_trns_chain);
- ds->temporary_trns_chain = NULL;
+ cancel_measurement_level_trns (&ds->permanent_trns_chain);
+ trns_chain_splice (&ds->permanent_trns_chain, &ds->temporary_trns_chain);
- ds->cur_trns_chain = ds->permanent_trns_chain;
+ ds->temporary = false;
- dict_destroy (ds->permanent_dict);
+ dict_unref (ds->permanent_dict);
ds->permanent_dict = NULL;
return true;
{
if (proc_in_temporary_transformations (ds))
{
- dict_destroy (ds->dict);
+ trns_chain_clear (&ds->temporary_trns_chain);
+
+ dict_unref (ds->dict);
ds->dict = ds->permanent_dict;
ds->permanent_dict = NULL;
- trns_chain_destroy (ds->temporary_trns_chain);
- ds->temporary_trns_chain = NULL;
- dataset_transformations_changed__ (
- ds, !trns_chain_is_empty (ds->permanent_trns_chain));
+ dataset_transformations_changed__ (ds, ds->permanent_trns_chain.n != 0);
return true;
}
else
{
bool ok;
assert (ds->proc_state == PROC_COMMITTED);
- ok = trns_chain_destroy (ds->permanent_trns_chain);
- ok = trns_chain_destroy (ds->temporary_trns_chain) && ok;
- ds->permanent_trns_chain = ds->cur_trns_chain = trns_chain_create ();
- ds->temporary_trns_chain = NULL;
+ ok = trns_chain_clear (&ds->permanent_trns_chain);
+ ok = trns_chain_clear (&ds->temporary_trns_chain) && ok;
+ ds->temporary = false;
+ for (size_t i = 0; i < ds->n_stack; i++)
+ ok = trns_chain_uninit (&ds->stack[i]) && ok;
+ ds->n_stack = 0;
dataset_transformations_changed__ (ds, false);
return ok;
}
-static int
+void
+proc_push_transformations (struct dataset *ds)
+{
+ if (ds->n_stack >= ds->allocated_stack)
+ ds->stack = x2nrealloc (ds->stack, &ds->allocated_stack,
+ sizeof *ds->stack);
+ trns_chain_init (&ds->stack[ds->n_stack++]);
+}
+
+void
+proc_pop_transformations (struct dataset *ds, struct trns_chain *chain)
+{
+ assert (ds->n_stack > 0);
+ *chain = ds->stack[--ds->n_stack];
+}
+
+bool
+proc_has_transformations (const struct dataset *ds)
+{
+ return ds->permanent_trns_chain.n || ds->temporary_trns_chain.n;
+}
+
+static enum trns_result
store_case_num (void *var_, struct ccase **cc, casenumber case_num)
{
struct variable *var = var_;
*cc = case_unshare (*cc);
- case_data_rw (*cc, var)->f = case_num;
+ *case_num_rw (*cc, var) = case_num;
return TRNS_CONTINUE;
}
-/* Add a variable which we can sort by to get back the original order. */
+/* Add a variable $ORDERING which we can sort by to get back the original order. */
struct variable *
add_permanent_ordering_transformation (struct dataset *ds)
{
- struct variable *temp_var;
+ struct dictionary *d = ds->permanent_dict ? ds->permanent_dict : ds->dict;
+ struct variable *order_var = dict_create_var_assert (d, "$ORDER", 0);
+ ds->order_var = order_var;
- temp_var = dict_create_var_assert (ds->dict, "$ORDER", 0);
- if (proc_in_temporary_transformations (ds))
+ if (ds->permanent_dict)
{
- struct variable *perm_var;
-
- perm_var = dict_clone_var_in_place_assert (ds->permanent_dict, temp_var);
- trns_chain_append (ds->permanent_trns_chain, NULL, store_case_num,
- NULL, perm_var);
- trns_chain_finalize (ds->permanent_trns_chain);
+ order_var = dict_create_var_assert (ds->dict, "$ORDER", 0);
+ static const struct trns_class trns_class = {
+ .name = "ordering",
+ .execute = store_case_num
+ };
+ const struct transformation t = { .class = &trns_class, .aux = order_var };
+ trns_chain_prepend (&ds->temporary_trns_chain, &t);
}
- else
- add_transformation (ds, store_case_num, NULL, temp_var);
- return temp_var;
+ return order_var;
}
\f
/* Causes output from the next procedure to be discarded, instead
return true;
}
\f
-static trns_proc_func case_limit_trns_proc;
-static trns_free_func case_limit_trns_free;
-
-/* Adds a transformation that limits the number of cases that may
- pass through, if DS->DICT has a case limit. */
-static void
-add_case_limit_trns (struct dataset *ds)
-{
- casenumber case_limit = dict_get_case_limit (ds->dict);
- if (case_limit != 0)
- {
- casenumber *cases_remaining = xmalloc (sizeof *cases_remaining);
- *cases_remaining = case_limit;
- add_transformation (ds, case_limit_trns_proc, case_limit_trns_free,
- cases_remaining);
- dict_set_case_limit (ds->dict, 0);
- }
-}
-
/* Limits the maximum number of cases processed to
*CASES_REMAINING. */
-static int
+static enum trns_result
case_limit_trns_proc (void *cases_remaining_,
struct ccase **c UNUSED, casenumber case_nr UNUSED)
{
free (cases_remaining);
return true;
}
-\f
-static trns_proc_func filter_trns_proc;
-/* Adds a temporary transformation to filter data according to
- the variable specified on FILTER, if any. */
+/* Adds a transformation that limits the number of cases that may
+ pass through, if DS->DICT has a case limit. */
static void
-add_filter_trns (struct dataset *ds)
+add_case_limit_trns (struct dataset *ds)
{
- struct variable *filter_var = dict_get_filter (ds->dict);
- if (filter_var != NULL)
+ casenumber case_limit = dict_get_case_limit (ds->dict);
+ if (case_limit != 0)
{
- proc_start_temporary_transformations (ds);
- add_transformation (ds, filter_trns_proc, NULL, filter_var);
+ casenumber *cases_remaining = xmalloc (sizeof *cases_remaining);
+ *cases_remaining = case_limit;
+
+ static const struct trns_class trns_class = {
+ .name = "case limit",
+ .execute = case_limit_trns_proc,
+ .destroy = case_limit_trns_free,
+ };
+ add_transformation (ds, &trns_class, cases_remaining);
+
+ dict_set_case_limit (ds->dict, 0);
}
}
+\f
/* FILTER transformation. */
-static int
+static enum trns_result
filter_trns_proc (void *filter_var_,
- struct ccase **c UNUSED, casenumber case_nr UNUSED)
+ struct ccase **c, casenumber case_nr UNUSED)
{
struct variable *filter_var = filter_var_;
double f = case_num (*c, filter_var);
- return (f != 0.0 && !var_is_num_missing (filter_var, f, MV_ANY)
+ return (f != 0.0 && !var_is_num_missing (filter_var, f)
? TRNS_CONTINUE : TRNS_DROP_CASE);
}
+/* Adds a temporary transformation to filter data according to
+ the variable specified on FILTER, if any. */
+static void
+add_filter_trns (struct dataset *ds)
+{
+ struct variable *filter_var = dict_get_filter (ds->dict);
+ if (filter_var != NULL)
+ {
+ proc_start_temporary_transformations (ds);
+
+ static const struct trns_class trns_class = {
+ .name = "FILTER",
+ .execute = filter_trns_proc,
+ };
+ add_transformation (ds, &trns_class, filter_var);
+ }
+}
void
dataset_need_lag (struct dataset *ds, int n_before)
ds->n_lag = MAX (ds->n_lag, n_before);
}
\f
+/* Measurement guesser, for guessing a measurement level from formats and
+ data. */
+
+struct mg_value
+ {
+ struct hmap_node hmap_node;
+ double value;
+ };
+
+struct mg_var
+ {
+ struct variable *var;
+ struct hmap *values;
+ };
+
+static void
+mg_var_uninit (struct mg_var *mgv)
+{
+ struct mg_value *mgvalue, *next;
+ HMAP_FOR_EACH_SAFE (mgvalue, next, struct mg_value, hmap_node,
+ mgv->values)
+ {
+ hmap_delete (mgv->values, &mgvalue->hmap_node);
+ free (mgvalue);
+ }
+ hmap_destroy (mgv->values);
+ free (mgv->values);
+}
+
+static enum measure
+mg_var_interpret (const struct mg_var *mgv)
+{
+ size_t n = hmap_count (mgv->values);
+ if (!n)
+ {
+ /* All missing (or no data). */
+ return MEASURE_NOMINAL;
+ }
+
+ const struct mg_value *mgvalue;
+ HMAP_FOR_EACH (mgvalue, struct mg_value, hmap_node,
+ mgv->values)
+ if (mgvalue->value < 10)
+ return MEASURE_NOMINAL;
+ return MEASURE_SCALE;
+}
+
+static enum measure
+mg_var_add_value (struct mg_var *mgv, double value)
+{
+ if (var_is_num_missing (mgv->var, value))
+ return MEASURE_UNKNOWN;
+ else if (value < 0 || value != floor (value))
+ return MEASURE_SCALE;
+
+ size_t hash = hash_double (value, 0);
+ struct mg_value *mgvalue;
+ HMAP_FOR_EACH_WITH_HASH (mgvalue, struct mg_value, hmap_node,
+ hash, mgv->values)
+ if (mgvalue->value == value)
+ return MEASURE_UNKNOWN;
+
+ mgvalue = xmalloc (sizeof *mgvalue);
+ mgvalue->value = value;
+ hmap_insert (mgv->values, &mgvalue->hmap_node, hash);
+ if (hmap_count (mgv->values) >= settings_get_scalemin ())
+ return MEASURE_SCALE;
+
+ return MEASURE_UNKNOWN;
+}
+
+struct measure_guesser
+ {
+ struct mg_var *vars;
+ size_t n_vars;
+ };
+
+static struct measure_guesser *
+measure_guesser_create__ (struct dictionary *dict)
+{
+ struct mg_var *mgvs = NULL;
+ size_t n_mgvs = 0;
+ size_t allocated_mgvs = 0;
+
+ for (size_t i = 0; i < dict_get_n_vars (dict); i++)
+ {
+ struct variable *var = dict_get_var (dict, i);
+ if (var_get_measure (var) != MEASURE_UNKNOWN)
+ continue;
+
+ struct fmt_spec f = var_get_print_format (var);
+ enum measure m = var_default_measure_for_format (f.type);
+ if (m != MEASURE_UNKNOWN)
+ {
+ var_set_measure (var, m);
+ continue;
+ }
+
+ if (n_mgvs >= allocated_mgvs)
+ mgvs = x2nrealloc (mgvs, &allocated_mgvs, sizeof *mgvs);
+
+ struct mg_var *mgv = &mgvs[n_mgvs++];
+ *mgv = (struct mg_var) {
+ .var = var,
+ .values = xmalloc (sizeof *mgv->values),
+ };
+ hmap_init (mgv->values);
+ }
+ if (!n_mgvs)
+ return NULL;
+
+ struct measure_guesser *mg = xmalloc (sizeof *mg);
+ *mg = (struct measure_guesser) {
+ .vars = mgvs,
+ .n_vars = n_mgvs,
+ };
+ return mg;
+}
+
+/* Scans through DS's dictionary for variables that have an unknown measurement
+ level. For those, if the measurement level can be guessed based on the
+ variable's type and format, sets a default. If that's enough, returns NULL.
+ If any remain whose levels are unknown and can't be guessed that way,
+ creates and returns a structure that the caller should pass to
+ measure_guesser_add_case() or measure_guesser_run() for guessing a
+ measurement level based on the data. */
+struct measure_guesser *
+measure_guesser_create (struct dataset *ds)
+{
+ return measure_guesser_create__ (dataset_dict (ds));
+}
+
+/* Adds data from case C to MG. */
+static void
+measure_guesser_add_case (struct measure_guesser *mg, const struct ccase *c)
+{
+ for (size_t i = 0; i < mg->n_vars; )
+ {
+ struct mg_var *mgv = &mg->vars[i];
+ double value = case_num (c, mgv->var);
+ enum measure m = mg_var_add_value (mgv, value);
+ if (m != MEASURE_UNKNOWN)
+ {
+ var_set_measure (mgv->var, m);
+
+ mg_var_uninit (mgv);
+ *mgv = mg->vars[--mg->n_vars];
+ }
+ else
+ i++;
+ }
+}
+
+/* Destroys MG. */
+void
+measure_guesser_destroy (struct measure_guesser *mg)
+{
+ if (!mg)
+ return;
+
+ for (size_t i = 0; i < mg->n_vars; i++)
+ {
+ struct mg_var *mgv = &mg->vars[i];
+ var_set_measure (mgv->var, mg_var_interpret (mgv));
+ mg_var_uninit (mgv);
+ }
+ free (mg->vars);
+ free (mg);
+}
+
+/* Adds final measurement levels based on MG, after all the cases have been
+ added. */
+static void
+measure_guesser_commit (struct measure_guesser *mg)
+{
+ for (size_t i = 0; i < mg->n_vars; i++)
+ {
+ struct mg_var *mgv = &mg->vars[i];
+ var_set_measure (mgv->var, mg_var_interpret (mgv));
+ }
+}
+
+/* Passes the cases in READER through MG and uses the data in the cases to set
+ measurement levels for the variables where they were still unknown. */
+void
+measure_guesser_run (struct measure_guesser *mg,
+ const struct casereader *reader)
+{
+ struct casereader *r = casereader_clone (reader);
+ while (mg->n_vars > 0)
+ {
+ struct ccase *c = casereader_read (r);
+ if (!c)
+ break;
+ measure_guesser_add_case (mg, c);
+ case_unref (c);
+ }
+ casereader_destroy (r);
+
+ measure_guesser_commit (mg);
+}
+\f
+/* A transformation for guessing measurement levels. */
+
+static enum trns_result
+mg_trns_proc (void *mg_, struct ccase **c, casenumber case_nr UNUSED)
+{
+ struct measure_guesser *mg = mg_;
+ measure_guesser_add_case (mg, *c);
+ return TRNS_CONTINUE;
+}
+
+static bool
+mg_trns_free (void *mg_)
+{
+ struct measure_guesser *mg = mg_;
+ measure_guesser_commit (mg);
+ measure_guesser_destroy (mg);
+ return true;
+}
+
+static const struct trns_class mg_trns_class = {
+ .name = "add measurement level",
+ .execute = mg_trns_proc,
+ .destroy = mg_trns_free,
+};
+
+static void
+add_measurement_level_trns (struct dataset *ds, struct dictionary *dict)
+{
+ struct measure_guesser *mg = measure_guesser_create__ (dict);
+ if (mg)
+ add_transformation (ds, &mg_trns_class, mg);
+}
+
+static void
+cancel_measurement_level_trns (struct trns_chain *chain)
+{
+ if (!chain->n)
+ return;
+
+ struct transformation *trns = &chain->xforms[chain->n - 1];
+ if (trns->class != &mg_trns_class)
+ return;
+
+ struct measure_guesser *mg = trns->aux;
+ measure_guesser_destroy (mg);
+ chain->n--;
+}
+\f
static void
dataset_changed__ (struct dataset *ds)
{