From 899ca177810116bc0ff1c4bcce9368d79cd71d42 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 29 Jul 2022 14:09:59 -0700 Subject: [PATCH] Automatically infer variables' measurement level from format and data. --- NEWS | 4 + doc/dev/system-file-format.texi | 18 +- doc/language.texi | 69 +++++ doc/utilities.texi | 6 + src/data/dataset.c | 268 +++++++++++++++++- src/data/dataset.h | 4 + src/data/settings.c | 14 + src/data/settings.h | 3 + src/data/variable.c | 39 ++- src/data/variable.h | 10 +- src/language/data-io/matrix-data.c | 7 +- src/language/stats/rank.c | 11 + src/ui/gui/psppire-dictview.c | 3 + src/ui/gui/psppire-import-textfile.c | 3 +- tests/data/pc+-file-reader.at | 50 ++-- tests/data/por-file.at | 21 +- tests/data/sys-file-reader.at | 98 +++---- tests/data/sys-file.at | 2 +- .../language/data-io/get-data-spreadsheet.at | 15 +- tests/language/data-io/matrix-data.at | 8 +- tests/language/dictionary/apply.at | 8 +- tests/language/dictionary/sys-file-info.at | 2 +- tests/language/dictionary/value-labels.at | 8 +- tests/language/dictionary/variable-display.at | 93 ++++++ tests/language/stats/autorecode.at | 2 +- tests/language/stats/rank.at | 20 +- tests/perl-module.at | 4 +- 27 files changed, 653 insertions(+), 137 deletions(-) diff --git a/NEWS b/NEWS index 13ab59c643..d6c30dd4f4 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,10 @@ Changes after 1.6.2: Changes from 1.6.1 to 1.6.2: + * Previously, numeric variables defaulted to "scale" measurement + level. Now, the default is "unknown", with a better guess chosen + based on heuristics the first time the data is read. + * Bug fixes. Changes from 1.6.0 to 1.6.1: diff --git a/doc/dev/system-file-format.texi b/doc/dev/system-file-format.texi index ca7c668287..17f5c1450f 100644 --- a/doc/dev/system-file-format.texi +++ b/doc/dev/system-file-format.texi @@ -1001,18 +1001,24 @@ members are as follows: @table @code @item int32 measure; -The measurement type of the variable: +The measurement level of the variable: @table @asis +@item 0 +Unknown @item 1 -Nominal Scale +Nominal @item 2 -Ordinal Scale +Ordinal @item 3 -Continuous Scale +Scale @end table -SPSS sometimes writes a @code{measure} of 0. PSPP interprets this as -nominal scale. +An ``unknown'' @code{measure} of 0 means that the variable was created +in some way that doesn't make the measurement level clear, e.g.@: with +a @code{COMPUTE} transformation. PSPP sets the measurement level the +first time it reads the data using the rules documented in +@ref{Measurement Level,,,pspp, PSPP Users Guide}, so this should +rarely appear. @item int32 width; The width of the display column for the variable in characters. diff --git a/doc/language.texi b/doc/language.texi index 8b8b7fa118..7b750eb7ae 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -507,6 +507,75 @@ they are displayed. Example: a width of 8, with 2 decimal places. Similar to print format, but used by the @cmd{WRITE} command (@pxref{WRITE}). +@cindex measurement level +@item Measurement level +@anchor{Measurement Level} +One of the following: + +@table @asis +@item Nominal +Each value of a nominal variable represents a distinct category. The +possible categories are finite and often have value labels. The order +of categories is not significant. Political parties, US states, and +yes/no choices are nominal. Numeric and string variables can be +nominal. + +@item Ordinal +Ordinal variables also represent distinct categories, but their values +are arranged according to some natural order. Likert scales, e.g.@: +from strongly disagree to strongly agree, are ordinal. Data grouped +into ranges, e.g.@: age groups or income groups, are ordinal. Both +numeric and string variables can be ordinal. String values are +ordered alphabetically, so letter grades from A to F will work as +expected, but @code{poor}, @code{satisfactory}, @code{excellent} will +not. + +@item Scale +Scale variables are ones for which differences and ratios are +meaningful. These are often values which have a natural unit +attached, such as age in years, income in dollars, or distance in +miles. Only numeric variables are scalar. +@end table + +Variables created by @cmd{COMPUTE} and similar transformations, +obtained from external sources, etc., initially have an unknown +measurement level. Any procedure that reads the data will then assign +a default measurement level. @pspp{} can assign some defaults without +reading the data: + +@itemize @bullet +@item +Nominal, if it's a string variable. + +@item +Nominal, if the variable has a WKDAY or MONTH print format. + +@item +Scale, if the variable has a DOLLAR, CCA through CCE, or time or date +print format. +@end itemize + +Otherwise, @pspp{} reads the data and decides based on its +distribution: + +@itemize @bullet +@item +Nominal, if all observations are missing. + +@item +Scale, if one or more valid observations are noninteger or negative. + +@item +Scale, if no valid observation is less than 10. + +@item +Scale, if the variable has 24 or more unique valid values. The value +24 is the default and can be adjusted (@pxref{SET SCALEMIN}). +@end itemize + +Finally, if none of the above is true, @pspp{} assigns the variable a +nominal measurement level. + @cindex custom attributes @item Custom attributes User-defined associations between names and values. @xref{VARIABLE diff --git a/doc/utilities.texi b/doc/utilities.texi index dcbfde1481..4cad836ad1 100644 --- a/doc/utilities.texi +++ b/doc/utilities.texi @@ -501,6 +501,7 @@ SET /SEED=@{RANDOM,@var{seed_value}@} /UNDEFINED=@{WARN,NOWARN@} /FUZZBITS=@var{fuzzbits} + /SCALEMIN=@var{count} (data output) /CC@{A,B,C,D,E@}=@{'@var{npre},@var{pre},@var{suf},@var{nsuf}','@var{npre}.@var{pre}.@var{suf}.@var{nsuf}'@} @@ -713,6 +714,11 @@ to accept for rounding up a value that is almost halfway between two possibilities for rounding with the RND operator (@pxref{Miscellaneous Mathematics}). The default @var{fuzzbits} is 6. +@item SCALEMIN +@anchor{SET SCALEMIN} +The minimum number of distinct valid values for @pspp{} to assume that +a variable has a scale measurement level. @xref{Measurement Level}. + @item WORKSPACE The maximum amount of memory (in kilobytes) that @pspp{} uses to store data being processed. If memory in excess of the workspace size diff --git a/src/data/dataset.c b/src/data/dataset.c index cbff74088b..450383fb87 100644 --- a/src/data/dataset.c +++ b/src/data/dataset.c @@ -36,6 +36,8 @@ #include "data/transformations.h" #include "data/variable.h" #include "libpspp/deque.h" +#include "libpspp/hash-functions.h" +#include "libpspp/hmap.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "libpspp/taint.h" @@ -116,6 +118,8 @@ static void dataset_changed__ (struct dataset *); static void dataset_transformations_changed__ (struct dataset *, bool non_empty); +static void add_measurement_level_trns (struct dataset *, struct dictionary *); +static void cancel_measurement_level_trns (struct trns_chain *); static void add_case_limit_trns (struct dataset *ds); static void add_filter_trns (struct dataset *ds); @@ -177,6 +181,7 @@ dataset_clone (struct dataset *old, const char *name) assert (old->sink == NULL); assert (!old->temporary); assert (!old->temporary_trns_chain.n); + assert (!old->n_stack); new = xzalloc (sizeof *new); new->name = xstrdup (name); @@ -421,6 +426,7 @@ proc_open_filtering (struct dataset *ds, bool filter) { struct casereader *reader; + assert (ds->n_stack == 0); assert (ds->source != NULL); assert (ds->proc_state == PROC_COMMITTED); @@ -432,6 +438,8 @@ proc_open_filtering (struct dataset *ds, bool filter) add_case_limit_trns (ds); if (filter) add_filter_trns (ds); + if (!proc_in_temporary_transformations (ds)) + add_measurement_level_trns (ds, ds->dict); /* Make permanent_dict refer to the dictionary right before data reaches the sink. */ @@ -609,6 +617,7 @@ proc_commit (struct dataset *ds) /* Dictionary from before TEMPORARY becomes permanent. */ proc_cancel_temporary_transformations (ds); + bool ok = proc_cancel_all_transformations (ds) && ds->ok; if (!ds->discard_output) { @@ -638,7 +647,7 @@ proc_commit (struct dataset *ds) dict_clear_vectors (ds->dict); ds->permanent_dict = NULL; - return proc_cancel_all_transformations (ds) && ds->ok; + return ok; } /* Casereader class for procedure execution. */ @@ -699,11 +708,13 @@ proc_in_temporary_transformations (const struct dataset *ds) void proc_start_temporary_transformations (struct dataset *ds) { + assert (!ds->n_stack); if (!proc_in_temporary_transformations (ds)) { add_case_limit_trns (ds); ds->permanent_dict = dict_clone (ds->dict); + add_measurement_level_trns (ds, ds->permanent_dict); ds->temporary = true; dataset_transformations_changed__ (ds, true); @@ -723,6 +734,7 @@ proc_make_temporary_transformations_permanent (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { + cancel_measurement_level_trns (&ds->permanent_trns_chain); trns_chain_splice (&ds->permanent_trns_chain, &ds->temporary_trns_chain); ds->temporary = false; @@ -744,12 +756,12 @@ proc_cancel_temporary_transformations (struct dataset *ds) { if (proc_in_temporary_transformations (ds)) { + trns_chain_clear (&ds->temporary_trns_chain); + dict_unref (ds->dict); ds->dict = ds->permanent_dict; ds->permanent_dict = NULL; - trns_chain_clear (&ds->temporary_trns_chain); - dataset_transformations_changed__ (ds, ds->permanent_trns_chain.n != 0); return true; } @@ -938,6 +950,256 @@ dataset_need_lag (struct dataset *ds, int n_before) ds->n_lag = MAX (ds->n_lag, n_before); } +/* Measurement guesser, for guessing a measurement level from formats and + data. */ + +struct mg_value + { + struct hmap_node hmap_node; + double value; + }; + +struct mg_var + { + struct variable *var; + struct hmap *values; + }; + +static void +mg_var_uninit (struct mg_var *mgv) +{ + struct mg_value *mgvalue, *next; + HMAP_FOR_EACH_SAFE (mgvalue, next, struct mg_value, hmap_node, + mgv->values) + { + hmap_delete (mgv->values, &mgvalue->hmap_node); + free (mgvalue); + } + hmap_destroy (mgv->values); + free (mgv->values); +} + +static enum measure +mg_var_interpret (const struct mg_var *mgv) +{ + size_t n = hmap_count (mgv->values); + if (!n) + { + /* All missing (or no data). */ + return MEASURE_NOMINAL; + } + + const struct mg_value *mgvalue; + HMAP_FOR_EACH (mgvalue, struct mg_value, hmap_node, + mgv->values) + if (mgvalue->value < 10) + return MEASURE_NOMINAL; + return MEASURE_SCALE; +} + +static enum measure +mg_var_add_value (struct mg_var *mgv, double value) +{ + if (var_is_num_missing (mgv->var, value)) + return MEASURE_UNKNOWN; + else if (value < 0 || value != floor (value)) + return MEASURE_SCALE; + + size_t hash = hash_double (value, 0); + struct mg_value *mgvalue; + HMAP_FOR_EACH_WITH_HASH (mgvalue, struct mg_value, hmap_node, + hash, mgv->values) + if (mgvalue->value == value) + return MEASURE_UNKNOWN; + + mgvalue = xmalloc (sizeof *mgvalue); + mgvalue->value = value; + hmap_insert (mgv->values, &mgvalue->hmap_node, hash); + if (hmap_count (mgv->values) >= settings_get_scalemin ()) + return MEASURE_SCALE; + + return MEASURE_UNKNOWN; +} + +struct measure_guesser + { + struct mg_var *vars; + size_t n_vars; + }; + +static struct measure_guesser * +measure_guesser_create__ (struct dictionary *dict) +{ + struct mg_var *mgvs = NULL; + size_t n_mgvs = 0; + size_t allocated_mgvs = 0; + + for (size_t i = 0; i < dict_get_n_vars (dict); i++) + { + struct variable *var = dict_get_var (dict, i); + if (var_get_measure (var) != MEASURE_UNKNOWN) + continue; + + const struct fmt_spec *f = var_get_print_format (var); + enum measure m = var_default_measure_for_format (f->type); + if (m != MEASURE_UNKNOWN) + { + var_set_measure (var, m); + continue; + } + + if (n_mgvs >= allocated_mgvs) + mgvs = x2nrealloc (mgvs, &allocated_mgvs, sizeof *mgvs); + + struct mg_var *mgv = &mgvs[n_mgvs++]; + *mgv = (struct mg_var) { + .var = var, + .values = xmalloc (sizeof *mgv->values), + }; + hmap_init (mgv->values); + } + if (!n_mgvs) + return NULL; + + struct measure_guesser *mg = xmalloc (sizeof *mg); + *mg = (struct measure_guesser) { + .vars = mgvs, + .n_vars = n_mgvs, + }; + return mg; +} + +/* Scans through DS's dictionary for variables that have an unknown measurement + level. For those, if the measurement level can be guessed based on the + variable's type and format, sets a default. If that's enough, returns NULL. + If any remain whose levels are unknown and can't be guessed that way, + creates and returns a structure that the caller should pass to + measure_guesser_add_case() or measure_guesser_run() for guessing a + measurement level based on the data. */ +struct measure_guesser * +measure_guesser_create (struct dataset *ds) +{ + return measure_guesser_create__ (dataset_dict (ds)); +} + +/* Adds data from case C to MG. */ +static void +measure_guesser_add_case (struct measure_guesser *mg, const struct ccase *c) +{ + for (size_t i = 0; i < mg->n_vars; ) + { + struct mg_var *mgv = &mg->vars[i]; + double value = case_num (c, mgv->var); + enum measure m = mg_var_add_value (mgv, value); + if (m != MEASURE_UNKNOWN) + { + var_set_measure (mgv->var, m); + + mg_var_uninit (mgv); + *mgv = mg->vars[--mg->n_vars]; + } + else + i++; + } +} + +/* Destroys MG. */ +void +measure_guesser_destroy (struct measure_guesser *mg) +{ + if (!mg) + return; + + for (size_t i = 0; i < mg->n_vars; i++) + { + struct mg_var *mgv = &mg->vars[i]; + var_set_measure (mgv->var, mg_var_interpret (mgv)); + mg_var_uninit (mgv); + } + free (mg->vars); + free (mg); +} + +/* Adds final measurement levels based on MG, after all the cases have been + added. */ +static void +measure_guesser_commit (struct measure_guesser *mg) +{ + for (size_t i = 0; i < mg->n_vars; i++) + { + struct mg_var *mgv = &mg->vars[i]; + var_set_measure (mgv->var, mg_var_interpret (mgv)); + } +} + +/* Passes the cases in READER through MG and uses the data in the cases to set + measurement levels for the variables where they were still unknown. */ +void +measure_guesser_run (struct measure_guesser *mg, + const struct casereader *reader) +{ + struct casereader *r = casereader_clone (reader); + while (mg->n_vars > 0) + { + struct ccase *c = casereader_read (r); + if (!c) + break; + measure_guesser_add_case (mg, c); + case_unref (c); + } + casereader_destroy (r); + + measure_guesser_commit (mg); +} + +/* A transformation for guessing measurement levels. */ + +static enum trns_result +mg_trns_proc (void *mg_, struct ccase **c, casenumber case_nr UNUSED) +{ + struct measure_guesser *mg = mg_; + measure_guesser_add_case (mg, *c); + return TRNS_CONTINUE; +} + +static bool +mg_trns_free (void *mg_) +{ + struct measure_guesser *mg = mg_; + measure_guesser_commit (mg); + measure_guesser_destroy (mg); + return true; +} + +static const struct trns_class mg_trns_class = { + .name = "add measurement level", + .execute = mg_trns_proc, + .destroy = mg_trns_free, +}; + +static void +add_measurement_level_trns (struct dataset *ds, struct dictionary *dict) +{ + struct measure_guesser *mg = measure_guesser_create__ (dict); + if (mg) + add_transformation (ds, &mg_trns_class, mg); +} + +static void +cancel_measurement_level_trns (struct trns_chain *chain) +{ + if (!chain->n) + return; + + struct transformation *trns = &chain->xforms[chain->n - 1]; + if (trns->class != &mg_trns_class) + return; + + struct measure_guesser *mg = trns->aux; + measure_guesser_destroy (mg); + chain->n--; +} + static void dataset_changed__ (struct dataset *ds) { diff --git a/src/data/dataset.h b/src/data/dataset.h index 1fe535536f..dfa444356e 100644 --- a/src/data/dataset.h +++ b/src/data/dataset.h @@ -105,6 +105,10 @@ bool proc_commit (struct dataset *); bool dataset_end_of_command (struct dataset *); +struct measure_guesser *measure_guesser_create (struct dataset *); +void measure_guesser_run (struct measure_guesser *, const struct casereader *); +void measure_guesser_destroy (struct measure_guesser *); + const struct ccase *lagged_case (const struct dataset *ds, int n_before); void dataset_need_lag (struct dataset *ds, int n_before); diff --git a/src/data/settings.c b/src/data/settings.c index 7d7934af92..c70635cec0 100644 --- a/src/data/settings.c +++ b/src/data/settings.c @@ -74,6 +74,7 @@ struct settings struct fmt_spec default_format; bool testing_mode; int fuzzbits; + int scalemin; int cmd_algorithm; int global_algorithm; @@ -122,6 +123,7 @@ static struct settings the_settings = { .default_format = { .type = FMT_F, .w = 8, .d = 2 }, .testing_mode = false, .fuzzbits = 6, + .scalemin = 24, .cmd_algorithm = ENHANCED, .global_algorithm = ENHANCED, .syntax = ENHANCED, @@ -548,6 +550,18 @@ settings_set_fuzzbits (int fuzzbits) the_settings.fuzzbits = fuzzbits; } +int +settings_get_scalemin (void) +{ + return the_settings.scalemin; +} + +void +settings_set_scalemin (int scalemin) +{ + the_settings.scalemin = scalemin; +} + /* Return the current algorithm setting */ enum behavior_mode settings_get_algorithm (void) diff --git a/src/data/settings.h b/src/data/settings.h index 34f4b8519f..6e412c4675 100644 --- a/src/data/settings.h +++ b/src/data/settings.h @@ -116,6 +116,9 @@ void settings_set_testing_mode (bool); int settings_get_fuzzbits (void); void settings_set_fuzzbits (int); +int settings_get_scalemin (void); +void settings_set_scalemin (int); + /* Whether to show variable or value labels or the underlying value or variable name. */ enum ATTRIBUTE ((packed)) settings_value_show diff --git a/src/data/variable.c b/src/data/variable.c index 8acac77fb2..2e584fe86f 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -63,6 +63,7 @@ const GEnumValue align[] = const GEnumValue measure[] = { + {MEASURE_UNKNOWN, "unknown", N_("Unknown")}, {MEASURE_NOMINAL, "nominal", N_("Nominal")}, {MEASURE_ORDINAL, "ordinal", N_("Ordinal")}, {MEASURE_SCALE, "scale", N_("Scale")}, @@ -139,7 +140,7 @@ var_create (const char *name, int width) v->leave = var_must_leave (v); type = val_type_from_width (width); v->alignment = var_default_alignment (type); - v->measure = var_default_measure (type); + v->measure = var_default_measure_for_type (type); v->role = ROLE_INPUT; v->display_width = var_default_display_width (width); v->print = v->write = var_default_formats (width); @@ -809,7 +810,8 @@ var_has_label (const struct variable *v) bool measure_is_valid (enum measure m) { - return m == MEASURE_NOMINAL || m == MEASURE_ORDINAL || m == MEASURE_SCALE; + return (m == MEASURE_UNKNOWN || m == MEASURE_NOMINAL + || m == MEASURE_ORDINAL || m == MEASURE_SCALE); } /* Returns a string version of measurement level M, for display to a user. @@ -873,9 +875,38 @@ var_set_measure (struct variable *v, enum measure measure) used to reset a variable's measurement level to the default. */ enum measure -var_default_measure (enum val_type type) +var_default_measure_for_type (enum val_type type) { - return type == VAL_NUMERIC ? MEASURE_SCALE : MEASURE_NOMINAL; + return type == VAL_NUMERIC ? MEASURE_UNKNOWN : MEASURE_NOMINAL; +} + +/* Returns the default measurement level for a variable with the given + FORMAT, or MEASURE_UNKNOWN if there is no good default. */ +enum measure +var_default_measure_for_format (enum fmt_type format) +{ + if (format == FMT_DOLLAR) + return MEASURE_SCALE; + + switch (fmt_get_category (format)) + { + case FMT_CAT_BASIC: + case FMT_CAT_LEGACY: + case FMT_CAT_BINARY: + case FMT_CAT_HEXADECIMAL: + return MEASURE_UNKNOWN; + + case FMT_CAT_CUSTOM: + case FMT_CAT_DATE: + case FMT_CAT_TIME: + return MEASURE_SCALE; + + case FMT_CAT_DATE_COMPONENT: + case FMT_CAT_STRING: + return MEASURE_NOMINAL; + } + + NOT_REACHED (); } /* Returns true if M is a valid variable role, diff --git a/src/data/variable.h b/src/data/variable.h index 2fa6866786..b5562ba827 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -127,9 +127,10 @@ bool var_has_label (const struct variable *); /* How data is measured. */ enum measure { - MEASURE_NOMINAL = 0, - MEASURE_ORDINAL = 1, - MEASURE_SCALE = 2, + MEASURE_UNKNOWN = 0, + MEASURE_NOMINAL = 1, + MEASURE_ORDINAL = 2, + MEASURE_SCALE = 3, n_MEASURES }; @@ -140,7 +141,8 @@ const char *measure_to_syntax (enum measure); enum measure var_get_measure (const struct variable *); void var_set_measure (struct variable *, enum measure); -enum measure var_default_measure (enum val_type); +enum measure var_default_measure_for_type (enum val_type); +enum measure var_default_measure_for_format (enum fmt_type); /* Intended usage of a variable, for populating dialogs. */ enum var_role diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index df8647480f..f741b4d54c 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -847,7 +847,10 @@ parse_matrix_data_variables (struct lexer *lexer) if (!strcasecmp (names[i], "ROWTYPE_")) dict_create_var_assert (dict, "ROWTYPE_", 8); else - dict_create_var_assert (dict, names[i], 0); + { + struct variable *var = dict_create_var_assert (dict, names[i], 0); + var_set_measure (var, MEASURE_SCALE); + } for (size_t i = 0; i < n_names; ++i) free (names[i]); @@ -891,6 +894,7 @@ parse_matrix_data_subvars (struct lexer *lexer, struct dictionary *dict, } *tv = true; + var_set_measure (v, MEASURE_NOMINAL); var_set_both_formats (v, &(struct fmt_spec) { .type = FMT_F, .w = 4 }); } return true; @@ -1007,6 +1011,7 @@ cmd_matrix_data (struct lexer *lexer, struct dataset *ds) mf.svars = xmalloc (sizeof *mf.svars); mf.svars[0] = dict_create_var_assert (dict, lex_tokcstr (lexer), 0); + var_set_measure (mf.svars[0], MEASURE_NOMINAL); var_set_both_formats ( mf.svars[0], &(struct fmt_spec) { .type = FMT_F, .w = 4 }); mf.n_svars = 1; diff --git a/src/language/stats/rank.c b/src/language/stats/rank.c index af0eb21538..ad113676cb 100644 --- a/src/language/stats/rank.c +++ b/src/language/stats/rank.c @@ -123,6 +123,16 @@ static const rank_function_t rank_func[n_RANK_FUNCS] = { rank_savage }; +static enum measure rank_measures[n_RANK_FUNCS] = { + [RANK] = MEASURE_ORDINAL, + [NORMAL] = MEASURE_ORDINAL, + [PERCENT] = MEASURE_ORDINAL, + [RFRACTION] = MEASURE_ORDINAL, + [PROPORTION] = MEASURE_ORDINAL, + [N] = MEASURE_SCALE, + [NTILES] = MEASURE_ORDINAL, + [SAVAGE] = MEASURE_ORDINAL, +}; enum ties { @@ -1116,6 +1126,7 @@ rank_cmd (struct dataset *ds, const struct rank *cmd) var = dict_create_var_assert (d, rs->dest_names[i], 0); var_set_both_formats (var, &dest_format[rs->rfunc]); var_set_label (var, rs->dest_labels[i]); + var_set_measure (var, rank_measures[rs->rfunc]); iv->output_vars[j] = var; } diff --git a/src/ui/gui/psppire-dictview.c b/src/ui/gui/psppire-dictview.c index 8a976c0424..5e998fc6f0 100644 --- a/src/ui/gui/psppire-dictview.c +++ b/src/ui/gui/psppire-dictview.c @@ -400,6 +400,7 @@ get_var_measurement_stock_id (enum fmt_type type, enum measure measure) case FMT_CAT_STRING: switch (measure) { + case MEASURE_UNKNOWN: return "role-none"; case MEASURE_NOMINAL: return "measure-string-nominal"; case MEASURE_ORDINAL: return "measure-string-ordinal"; case MEASURE_SCALE: return "role-none"; @@ -411,6 +412,7 @@ get_var_measurement_stock_id (enum fmt_type type, enum measure measure) case FMT_CAT_TIME: switch (measure) { + case MEASURE_UNKNOWN: return "role-none"; case MEASURE_NOMINAL: return "measure-date-nominal"; case MEASURE_ORDINAL: return "measure-date-ordinal"; case MEASURE_SCALE: return "measure-date-scale"; @@ -421,6 +423,7 @@ get_var_measurement_stock_id (enum fmt_type type, enum measure measure) default: switch (measure) { + case MEASURE_UNKNOWN: return "role-none"; case MEASURE_NOMINAL: return "measure-nominal"; case MEASURE_ORDINAL: return "measure-ordinal"; case MEASURE_SCALE: return "measure-scale"; diff --git a/src/ui/gui/psppire-import-textfile.c b/src/ui/gui/psppire-import-textfile.c index ec53d175ed..1b6a4d65bd 100644 --- a/src/ui/gui/psppire-import-textfile.c +++ b/src/ui/gui/psppire-import-textfile.c @@ -858,7 +858,8 @@ apply_dict (const struct dictionary *dict, struct string *s) if (var_has_label (var)) syntax_gen_pspp (s, "VARIABLE LABELS %ss %sq.\n", name, var_get_label (var)); - if (measure != var_default_measure (type)) + if (measure != var_default_measure_for_type (type) + && measure != MEASURE_UNKNOWN) syntax_gen_pspp (s, "VARIABLE LEVEL %ss (%ss).\n", name, measure_to_syntax (measure)); if (role != ROLE_INPUT) diff --git a/tests/data/pc+-file-reader.at b/tests/data/pc+-file-reader.at index 9d4357b146..115b2f953f 100644 --- a/tests/data/pc+-file-reader.at +++ b/tests/data/pc+-file-reader.at @@ -110,10 +110,10 @@ Label,PSPP synthetic test file Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values -NUM1,1,,Scale,Input,8,Right,F8.0,F8.0, -NUM2,2,Numeric variable 2's label,Scale,Input,8,Right,F8.0,F8.0, -NUM3,3,,Scale,Input,8,Right,F8.0,F8.0,1 -NUM4,4,Another numeric variable label,Scale,Input,8,Right,F8.0,F8.0,2 +NUM1,1,,Unknown,Input,8,Right,F8.0,F8.0, +NUM2,2,Numeric variable 2's label,Unknown,Input,8,Right,F8.0,F8.0, +NUM3,3,,Unknown,Input,8,Right,F8.0,F8.0,1 +NUM4,4,Another numeric variable label,Unknown,Input,8,Right,F8.0,F8.0,2 STR1,5,,Nominal,Input,8,Left,A8,A8, STR2,6,STR2's variable label,Nominal,Input,4,Left,A4,A4, STR3,7,,Nominal,Input,5,Left,A5,A5,"""MISS """ @@ -223,11 +223,11 @@ Label,PSPP synthetic test file Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 -NUM3,3,Scale,Input,8,Right,F8.0,F8.0 -NUM4,4,Scale,Input,8,Right,F8.0,F8.0 -NUM5,5,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 +NUM3,3,Unknown,Input,8,Right,F8.0,F8.0 +NUM4,4,Unknown,Input,8,Right,F8.0,F8.0 +NUM5,5,Unknown,Input,8,Right,F8.0,F8.0 STR1,6,Nominal,Input,1,Left,A1,A1 STR2,7,Nominal,Input,2,Left,A2,A2 STR3,8,Nominal,Input,3,Left,A3,A3 @@ -337,8 +337,8 @@ Label,PSPP synthetic test file Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 STR4,3,Nominal,Input,4,Left,A4,A4 STR8,4,Nominal,Input,8,Left,A8,A8 STR15,5,Nominal,Input,15,Left,A15,A15 @@ -413,10 +413,10 @@ Encoding,us-ascii Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 -NUM3,3,Scale,Input,8,Right,F8.0,F8.0 -NUM4,4,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 +NUM3,3,Unknown,Input,8,Right,F8.0,F8.0 +NUM4,4,Unknown,Input,8,Right,F8.0,F8.0 ]) AT_CLEANUP @@ -483,10 +483,10 @@ Label,PSPP synthetic test file Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 -NUM3,3,Scale,Input,8,Right,F8.0,F8.0 -NUM4,4,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 +NUM3,3,Unknown,Input,8,Right,F8.0,F8.0 +NUM4,4,Unknown,Input,8,Right,F8.0,F8.0 Table: Data List NUM1,NUM2,NUM3,NUM4 @@ -507,10 +507,10 @@ Encoding,us-ascii Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 -NUM3,3,Scale,Input,8,Right,F8.0,F8.0 -NUM4,4,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 +NUM3,3,Unknown,Input,8,Right,F8.0,F8.0 +NUM4,4,Unknown,Input,8,Right,F8.0,F8.0 ]) AT_CLEANUP @@ -1251,8 +1251,8 @@ Label,PSPP synthetic test file Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -NUM1,1,Scale,Input,8,Right,F8.0,F8.0 -NUM2,2,Scale,Input,8,Right,F8.0,F8.0 +NUM1,1,Unknown,Input,8,Right,F8.0,F8.0 +NUM2,2,Unknown,Input,8,Right,F8.0,F8.0 STR4,3,Nominal,Input,4,Left,A4,A4 STR8,4,Nominal,Input,8,Left,A8,A8 STR15,5,Nominal,Input,15,Left,A15,A15 diff --git a/tests/data/por-file.at b/tests/data/por-file.at index 9024f89ba2..fb73a50ee2 100644 --- a/tests/data/por-file.at +++ b/tests/data/por-file.at @@ -100,17 +100,18 @@ EXPORT OUTFILE='data.por'. AT_CHECK([pspp -O format=csv export.sps]) AT_DATA([import.sps], [dnl IMPORT FILE='data.por'. +EXECUTE. DISPLAY DICTIONARY. LIST. ]) AT_CHECK([pspp -O format=csv import.sps], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -VAR1,1,Scale,Input,8,Right,F1.0,F1.0 -VAR2,2,Scale,Input,8,Right,F1.0,F1.0 -VAR3,3,Scale,Input,8,Right,F1.0,F1.0 -VAR4,4,Scale,Input,8,Right,F1.0,F1.0 -VAR5,5,Scale,Input,8,Right,F1.0,F1.0 +VAR1,1,Nominal,Input,8,Right,F1.0,F1.0 +VAR2,2,Nominal,Input,8,Right,F1.0,F1.0 +VAR3,3,Nominal,Input,8,Right,F1.0,F1.0 +VAR4,4,Nominal,Input,8,Right,F1.0,F1.0 +VAR5,5,Nominal,Input,8,Right,F1.0,F1.0 Table: Value Labels Variable Value,,Label @@ -142,11 +143,11 @@ Compression,None Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -VAR1,1,Scale,Input,8,Right,F1.0,F1.0 -VAR2,2,Scale,Input,8,Right,F1.0,F1.0 -VAR3,3,Scale,Input,8,Right,F1.0,F1.0 -VAR4,4,Scale,Input,8,Right,F1.0,F1.0 -VAR5,5,Scale,Input,8,Right,F1.0,F1.0 +VAR1,1,Unknown,Input,8,Right,F1.0,F1.0 +VAR2,2,Unknown,Input,8,Right,F1.0,F1.0 +VAR3,3,Unknown,Input,8,Right,F1.0,F1.0 +VAR4,4,Unknown,Input,8,Right,F1.0,F1.0 +VAR5,5,Unknown,Input,8,Right,F1.0,F1.0 Table: Value Labels Variable Value,,Label diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index dd520feed3..f446631763 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -152,16 +152,16 @@ Label,PSPP synthetic test file: ôõöø Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values -num1,1,,Scale,Input,8,Right,F8.0,F8.0, -num2,2,Numeric variable 2's label (ùúû),Scale,Input,8,Right,F8.0,F8.0, -num3,3,,Scale,Input,8,Right,F8.0,F8.0,1 -num4,4,Another numeric variable label,Scale,Input,8,Right,F8.0,F8.0,1 -num5,5,,Scale,Input,8,Right,F8.0,F8.0,1; 2 -num6,6,,Scale,Input,8,Right,F8.0,F8.0,1; 2; 3 -num7,7,,Scale,Input,8,Right,F8.0,F8.0,1 THRU 3 -num8,8,,Scale,Input,8,Right,F8.0,F8.0,1 THRU 3; 5 -num9,9,,Scale,Input,8,Right,F8.0,F8.0,1 THRU HIGHEST; -5 -numàèìñò,10,,Scale,Input,8,Right,F8.0,F8.0,LOWEST THRU 1; 5 +num1,1,,Unknown,Input,8,Right,F8.0,F8.0, +num2,2,Numeric variable 2's label (ùúû),Unknown,Input,8,Right,F8.0,F8.0, +num3,3,,Unknown,Input,8,Right,F8.0,F8.0,1 +num4,4,Another numeric variable label,Unknown,Input,8,Right,F8.0,F8.0,1 +num5,5,,Unknown,Input,8,Right,F8.0,F8.0,1; 2 +num6,6,,Unknown,Input,8,Right,F8.0,F8.0,1; 2; 3 +num7,7,,Unknown,Input,8,Right,F8.0,F8.0,1 THRU 3 +num8,8,,Unknown,Input,8,Right,F8.0,F8.0,1 THRU 3; 5 +num9,9,,Unknown,Input,8,Right,F8.0,F8.0,1 THRU HIGHEST; -5 +numàèìñò,10,,Unknown,Input,8,Right,F8.0,F8.0,LOWEST THRU 1; 5 str1,11,,Nominal,Input,4,Left,A4,A4, str2,12,String variable 2's label,Nominal,Input,4,Left,A4,A4, str3,13,,Nominal,Input,4,Left,A4,A4,"""MISS""" @@ -222,8 +222,8 @@ LIST. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Numeric variable 2's label,Scale,Input,8,Right,F8.0,F8.0 +num1,1,,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Numeric variable 2's label,Unknown,Input,8,Right,F8.0,F8.0 Table: Data List num1,num2 @@ -276,8 +276,8 @@ LIST. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Numeric variable 2's label,Scale,Input,8,Right,F8.0,F8.0 +num1,1,,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Numeric variable 2's label,Unknown,Input,8,Right,F8.0,F8.0 Table: Data List num1,num2 @@ -409,11 +409,11 @@ DISPLAY DICTIONARY. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Scale,Input,8,Right,F8.0,F8.0 -num3,3,Scale,Input,8,Right,F8.0,F8.0 -num4,4,Scale,Input,8,Right,F8.0,F8.0 -num5,5,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Unknown,Input,8,Right,F8.0,F8.0 +num3,3,Unknown,Input,8,Right,F8.0,F8.0 +num4,4,Unknown,Input,8,Right,F8.0,F8.0 +num5,5,Unknown,Input,8,Right,F8.0,F8.0 str1,6,Nominal,Input,1,Left,A1,A1 str2,7,Nominal,Input,2,Left,A2,A2 str3,8,Nominal,Input,3,Left,A3,A3 @@ -940,13 +940,13 @@ DISPLAY DICTIONARY. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -LongVariableName1,1,Scale,Input,8,Right,F8.0,F8.0 -LongVariableName2,2,Scale,Input,8,Right,F8.0,F8.0 -LongVariableName3,3,Scale,Input,8,Right,F8.0,F8.0 -LongVariableName4,4,Scale,Input,8,Right,F8.0,F8.0 -Coördinate_X,5,Scale,Input,8,Right,F8.0,F8.0 -Coördinate_Y,6,Scale,Input,8,Right,F8.0,F8.0 -Coördinate_Z,7,Scale,Input,8,Right,F8.0,F8.0 +LongVariableName1,1,Unknown,Input,8,Right,F8.0,F8.0 +LongVariableName2,2,Unknown,Input,8,Right,F8.0,F8.0 +LongVariableName3,3,Unknown,Input,8,Right,F8.0,F8.0 +LongVariableName4,4,Unknown,Input,8,Right,F8.0,F8.0 +Coördinate_X,5,Unknown,Input,8,Right,F8.0,F8.0 +Coördinate_Y,6,Unknown,Input,8,Right,F8.0,F8.0 +Coördinate_Z,7,Unknown,Input,8,Right,F8.0,F8.0 ]) done AT_CLEANUP @@ -1105,9 +1105,9 @@ DISPLAY DICTIONARY. AT_CHECK([cat pspp.csv], [0], [[Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -FirstVariable,1,Scale,Output,8,Right,F8.0,F8.0 -SécondVariable,2,Scale,Input,8,Right,F8.0,F8.0 -ThirdVariable,3,Scale,Input,8,Right,F8.0,F8.0 +FirstVariable,1,Unknown,Output,8,Right,F8.0,F8.0 +SécondVariable,2,Unknown,Input,8,Right,F8.0,F8.0 +ThirdVariable,3,Unknown,Input,8,Right,F8.0,F8.0 ]]) done AT_CLEANUP @@ -1169,13 +1169,13 @@ warning: `sys-file.sav': Invalid role for variable x. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -i,1,Scale,Input,8,Right,F8.0,F8.0 -o,2,Scale,Output,8,Right,F8.0,F8.0 -b,3,Scale,Both,8,Right,F8.0,F8.0 -n,4,Scale,None,8,Right,F8.0,F8.0 -p,5,Scale,Partition,8,Right,F8.0,F8.0 -s,6,Scale,Split,8,Right,F8.0,F8.0 -x,7,Scale,Input,8,Right,F8.0,F8.0 +i,1,Unknown,Input,8,Right,F8.0,F8.0 +o,2,Unknown,Output,8,Right,F8.0,F8.0 +b,3,Unknown,Both,8,Right,F8.0,F8.0 +n,4,Unknown,None,8,Right,F8.0,F8.0 +p,5,Unknown,Partition,8,Right,F8.0,F8.0 +s,6,Unknown,Split,8,Right,F8.0,F8.0 +x,7,Unknown,Input,8,Right,F8.0,F8.0 ]) done AT_CLEANUP @@ -1228,8 +1228,8 @@ LIST. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Unknown,Input,8,Right,F8.0,F8.0 str4,3,Nominal,Input,4,Left,A4,A4 str8,4,Nominal,Input,8,Left,A8,A8 str15,5,Nominal,Input,15,Left,A15,A15 @@ -1291,8 +1291,8 @@ LIST. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Unknown,Input,8,Right,F8.0,F8.0 str4,3,Nominal,Input,4,Left,A4,A4 str8,4,Nominal,Input,8,Left,A8,A8 str15,5,Nominal,Input,15,Left,A15,A15 @@ -1358,8 +1358,8 @@ LIST. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Unknown,Input,8,Right,F8.0,F8.0 str4,3,Nominal,Input,4,Left,A4,A4 str8,4,Nominal,Input,8,Left,A8,A8 str15,5,Nominal,Input,15,Left,A15,A15 @@ -1456,8 +1456,8 @@ LIST. AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 -num2,2,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 +num2,2,Unknown,Input,8,Right,F8.0,F8.0 str4,3,Nominal,Input,4,Left,A4,A4 str8,4,Nominal,Input,8,Left,A8,A8 str15,5,Nominal,Input,15,Left,A15,A15 @@ -1725,8 +1725,8 @@ DISPLAY DICTIONARY. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -var1,1,Scale,Input,8,Right,F8.0,F8.0 -var001,2,Scale,Input,8,Right,F8.0,F8.0 +var1,1,Unknown,Input,8,Right,F8.0,F8.0 +var001,2,Unknown,Input,8,Right,F8.0,F8.0 ]) done AT_CLEANUP @@ -1966,7 +1966,7 @@ warning: `sys-file.sav' near offset 0x257: Ignoring long string missing value re Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values -num1,1,Scale,Input,8,Right,F8.0,F8.0, +num1,1,Unknown,Input,8,Right,F8.0,F8.0, str1,2,Nominal,Input,9,Left,A9,A9, str2,3,Nominal,Input,10,Left,A10,A10,"""abcdefgh""; ""ijklmnop""; ""qrstuvwx""" str3,4,Nominal,Input,11,Left,A11,A11, @@ -2003,7 +2003,7 @@ DISPLAY DICTIONARY. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 str1,2,Nominal,Input,4,Left,A4,A4 ]) done @@ -2248,7 +2248,7 @@ warning: \`sys-file.sav' near offset 0xd8: Integer format indicated by system fi Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -num1,1,Scale,Input,8,Right,F8.0,F8.0 +num1,1,Unknown,Input,8,Right,F8.0,F8.0 ]) done AT_CLEANUP diff --git a/tests/data/sys-file.at b/tests/data/sys-file.at index 86487173a0..7b2e208444 100644 --- a/tests/data/sys-file.at +++ b/tests/data/sys-file.at @@ -470,7 +470,7 @@ jalapeño vicuña. Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -àéîöçxyzabc,1,,Scale,Input,8,Right,F8.2,F8.2 +àéîöçxyzabc,1,,Nominal,Input,8,Right,F8.2,F8.2 roué,2,Provençal soupçon,Nominal,Input,9,Left,A9,A9 croûton,3,,Nominal,Input,32,Left,A1000,A1000 diff --git a/tests/language/data-io/get-data-spreadsheet.at b/tests/language/data-io/get-data-spreadsheet.at index 54486f02f0..cf380da1f3 100644 --- a/tests/language/data-io/get-data-spreadsheet.at +++ b/tests/language/data-io/get-data-spreadsheet.at @@ -403,6 +403,7 @@ AT_CHECK([cp $top_srcdir/tests/language/data-io/readnames.ods this.ods])dnl AT_DATA([readnames.sps],[dnl GET DATA /TYPE=ODS /FILE='this.ods' /CELLRANGE=RANGE 'A1:H8' /READNAMES=ON +EXECUTE. DISPLAY DICTIONARY. LIST. ]) @@ -411,13 +412,13 @@ LIST. AT_CHECK([pspp -O format=csv readnames.sps], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -freda,1,Scale,Input,8,Right,F8.2,F8.2 -fred,2,Scale,Input,8,Right,F8.2,F8.2 -fred_A,3,Scale,Input,8,Right,F8.2,F8.2 -fred_B,4,Scale,Input,8,Right,F8.2,F8.2 -fred_C,5,Scale,Input,8,Right,F8.2,F8.2 -fred_D,6,Scale,Input,8,Right,F8.2,F8.2 -fred_E,7,Scale,Input,8,Right,F8.2,F8.2 +freda,1,Nominal,Input,8,Right,F8.2,F8.2 +fred,2,Nominal,Input,8,Right,F8.2,F8.2 +fred_A,3,Nominal,Input,8,Right,F8.2,F8.2 +fred_B,4,Nominal,Input,8,Right,F8.2,F8.2 +fred_C,5,Nominal,Input,8,Right,F8.2,F8.2 +fred_D,6,Nominal,Input,8,Right,F8.2,F8.2 +fred_E,7,Nominal,Input,8,Right,F8.2,F8.2 Table: Data List freda,fred,fred_A,fred_B,fred_C,fred_D,fred_E diff --git a/tests/language/data-io/matrix-data.at b/tests/language/data-io/matrix-data.at index ca965dd7a2..50017c3389 100644 --- a/tests/language/data-io/matrix-data.at +++ b/tests/language/data-io/matrix-data.at @@ -254,8 +254,8 @@ list. AT_CHECK([pspp -O format=csv matrix-data.sps], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -s1,1,Scale,Input,8,Right,F4.0,F4.0 -s2,2,Scale,Input,8,Right,F4.0,F4.0 +s1,1,Nominal,Input,8,Right,F4.0,F4.0 +s2,2,Nominal,Input,8,Right,F4.0,F4.0 ROWTYPE_,3,Nominal,Input,8,Left,A8,A8 VARNAME_,4,Nominal,Input,8,Left,A8,A8 var01,5,Scale,Input,8,Right,F10.4,F10.4 @@ -418,9 +418,9 @@ list. AT_CHECK([pspp matrix-data.sps -O format=csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -s,1,Scale,Input,8,Right,F4.0,F4.0 +s,1,Nominal,Input,8,Right,F4.0,F4.0 ROWTYPE_,2,Nominal,Input,8,Left,A8,A8 -f,3,Scale,Input,8,Right,F4.0,F4.0 +f,3,Nominal,Input,8,Right,F4.0,F4.0 VARNAME_,4,Nominal,Input,8,Left,A8,A8 var01,5,Scale,Input,8,Right,F10.4,F10.4 var02,6,Scale,Input,8,Right,F10.4,F10.4 diff --git a/tests/language/dictionary/apply.at b/tests/language/dictionary/apply.at index 97f622fa6b..eb7baafe9f 100644 --- a/tests/language/dictionary/apply.at +++ b/tests/language/dictionary/apply.at @@ -36,15 +36,15 @@ display dictionary. AT_CHECK([pspp -O format=csv apply-dict.sps], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -foo,1,Scale,Input,8,Right,F8.2,F8.2 -bar,2,Scale,Input,8,Right,F8.2,F8.2 +foo,1,Nominal,Input,8,Right,F8.2,F8.2 +bar,2,Nominal,Input,8,Right,F8.2,F8.2 "apply-dict.sps:12: warning: APPLY DICTIONARY: Variable bar is numeric in target file, but string in source file." Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -foo,1,This is a label,Scale,Input,8,Right,TIME22.0,TIME22.0 -bar,2,,Scale,Input,8,Right,F8.2,F8.2 +foo,1,This is a label,Nominal,Input,8,Right,TIME22.0,TIME22.0 +bar,2,,Nominal,Input,8,Right,F8.2,F8.2 ]) AT_CLEANUP diff --git a/tests/language/dictionary/sys-file-info.at b/tests/language/dictionary/sys-file-info.at index 6b80f1e5be..4f53b4e0ff 100644 --- a/tests/language/dictionary/sys-file-info.at +++ b/tests/language/dictionary/sys-file-info.at @@ -55,7 +55,7 @@ Documents,"DOCUMENT A document. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -x,1,Scale,Input,8,Right,F8.2,F8.2 +x,1,Nominal,Input,8,Right,F8.2,F8.2 name,2,Nominal,Input,10,Left,A10,A10 ]) AT_CLEANUP diff --git a/tests/language/dictionary/value-labels.at b/tests/language/dictionary/value-labels.at index 1873d3779b..c09b8ef080 100644 --- a/tests/language/dictionary/value-labels.at +++ b/tests/language/dictionary/value-labels.at @@ -28,8 +28,8 @@ DISPLAY DICTIONARY. AT_CHECK([pspp -O format=csv value-labels.sps], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -ad,1,Scale,Input,8,Right,ADATE10,ADATE10 -dt,2,Scale,Input,8,Right,DATETIME20.0,DATETIME20.0 +ad,1,Unknown,Input,8,Right,ADATE10,ADATE10 +dt,2,Unknown,Input,8,Right,DATETIME20.0,DATETIME20.0 Table: Value Labels Variable Value,,Label @@ -56,7 +56,7 @@ AT_CHECK([pspp -o pspp.csv -o pspp.txt value-labels.sps]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -x,1,Scale,Input,8,Right,F8.2,F8.2 +x,1,Nominal,Input,8,Right,F8.2,F8.2 Table: Value Labels Variable Value,,Label @@ -95,7 +95,7 @@ AT_CHECK([pspp -o pspp.csv -o pspp.txt get.sps]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format -x,1,Scale,Input,8,Right,F8.2,F8.2 +x,1,Nominal,Input,8,Right,F8.2,F8.2 Table: Value Labels Variable Value,,Label diff --git a/tests/language/dictionary/variable-display.at b/tests/language/dictionary/variable-display.at index fae03a50cb..5aeac60c4b 100644 --- a/tests/language/dictionary/variable-display.at +++ b/tests/language/dictionary/variable-display.at @@ -35,6 +35,99 @@ z,3,Nominal,None,14,Center,F8.2,F8.2 ]) AT_CLEANUP +AT_SETUP([variable level inference and SCALEMIN]) +AT_DATA([var-level.sps], [dnl +DATA LIST LIST NOTABLE /n1 to n3 s1 to s5. + +* Nominal formats (copied from data that will default to scale). +COMPUTE n4=s1. +COMPUTE n5=s1. +FORMATS n4(WKDAY5) n5(MONTH5). + +* Scale formats (copied from data that will default to nominal). +COMPUTE s6=n1. +COMPUTE s7=n1. +COMPUTE s8=n1. +FORMATS s6(DOLLAR6.2) s7(CCA8.2) s8(DATETIME17). + +STRING string(A8). +DISPLAY DICTIONARY. +EXECUTE. + +* n1 has 10 unique small values -> nominal. +* n2 has 23 unique small values -> nominal. +* n3 is all missing -> nominal. +* s1 has 24 unique small values -> scale. +* s2 has one negative value -> scale. +* s3 has one non-integer value -> scale. +* s4 has no valid values less than 10 -> scale. +* s5 has no valid values less than 10,000 -> scale. +BEGIN DATA. +1 1 . 1 1 1 10 10001 +2 2 . 2 2 2 11 10002 +3 3 . 3 3 3 12 10003 +4 4 . 4 4 4 13 10004 +5 5 . 5 5 5 14 10005 +6 6 . 6 6 6 15 10006 +7 7 . 7 7 7 16 10007 +8 8 . 8 8 8 17 10008 +9 9 . 9 9 9 18 10009 +10 10 . 10 10 10.5 19 110000 +1 11 . 11 -1 1 11 10001 +2 12 . 12 2 2 12 10002 +3 13 . 13 3 3 13 10003 +4 14 . 14 4 4 14 10004 +5 15 . 15 5 5 15 10005 +6 16 . 16 6 6 16 10006 +7 17 . 17 7 7 17 10007 +8 18 . 18 8 8 18 10008 +9 19 . 19 9 9 19 10009 +1 20 . 20 1 1 20 10001 +2 21 . 21 2 2 21 10002 +3 22 . 22 3 3 22 10003 +4 23 . 23 4 4 23 10004 +5 23 . 24 5 5 24 10005 +6 23 . 24 6 6 25 10006 +END DATA. +DISPLAY DICTIONARY. +]) +AT_CHECK([pspp -o pspp.csv var-level.sps]) +AT_CHECK([cat pspp.csv], [0], [dnl +Table: Variables +Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format +n1,1,Unknown,Input,8,Right,F8.2,F8.2 +n2,2,Unknown,Input,8,Right,F8.2,F8.2 +n3,3,Unknown,Input,8,Right,F8.2,F8.2 +s1,4,Unknown,Input,8,Right,F8.2,F8.2 +s2,5,Unknown,Input,8,Right,F8.2,F8.2 +s3,6,Unknown,Input,8,Right,F8.2,F8.2 +s4,7,Unknown,Input,8,Right,F8.2,F8.2 +s5,8,Unknown,Input,8,Right,F8.2,F8.2 +n4,9,Unknown,Input,8,Right,WKDAY5,WKDAY5 +n5,10,Unknown,Input,8,Right,MONTH5,MONTH5 +s6,11,Unknown,Input,8,Right,DOLLAR6.2,DOLLAR6.2 +s7,12,Unknown,Input,8,Right,CCA8.2,CCA8.2 +s8,13,Unknown,Input,8,Right,DATETIME17.0,DATETIME17.0 +string,14,Nominal,Input,8,Left,A8,A8 + +Table: Variables +Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format +n1,1,Nominal,Input,8,Right,F8.2,F8.2 +n2,2,Nominal,Input,8,Right,F8.2,F8.2 +n3,3,Nominal,Input,8,Right,F8.2,F8.2 +s1,4,Scale,Input,8,Right,F8.2,F8.2 +s2,5,Scale,Input,8,Right,F8.2,F8.2 +s3,6,Scale,Input,8,Right,F8.2,F8.2 +s4,7,Scale,Input,8,Right,F8.2,F8.2 +s5,8,Scale,Input,8,Right,F8.2,F8.2 +n4,9,Nominal,Input,8,Right,WKDAY5,WKDAY5 +n5,10,Nominal,Input,8,Right,MONTH5,MONTH5 +s6,11,Scale,Input,8,Right,DOLLAR6.2,DOLLAR6.2 +s7,12,Scale,Input,8,Right,CCA8.2,CCA8.2 +s8,13,Scale,Input,8,Right,DATETIME17.0,DATETIME17.0 +string,14,Nominal,Input,8,Left,A8,A8 +]) +AT_CLEANUP AT_BANNER([VARIABLE LABELS]) diff --git a/tests/language/stats/autorecode.at b/tests/language/stats/autorecode.at index 78c8aabfd2..f639b078a4 100644 --- a/tests/language/stats/autorecode.at +++ b/tests/language/stats/autorecode.at @@ -228,7 +228,7 @@ oojimiflips,7,2 Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -new,3,tracking my stuff,Scale,Input,8,Right,F1.0,F1.0 +new,3,tracking my stuff,Nominal,Input,8,Right,F1.0,F1.0 Table: Value Labels Variable Value,,Label diff --git a/tests/language/stats/rank.at b/tests/language/stats/rank.at index e314071338..bd3f8becb4 100644 --- a/tests/language/stats/rank.at +++ b/tests/language/stats/rank.at @@ -122,13 +122,13 @@ b,Nb,N Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -a,1,,Scale,Input,8,Right,F8.2,F8.2 -b,2,,Scale,Input,8,Right,F8.2,F8.2 -Ra,3,RANK of a,Scale,Input,8,Right,F9.3,F9.3 -RFR001,4,RFRACTION of a,Scale,Input,8,Right,F6.4,F6.4 +a,1,,Nominal,Input,8,Right,F8.2,F8.2 +b,2,,Nominal,Input,8,Right,F8.2,F8.2 +Ra,3,RANK of a,Ordinal,Input,8,Right,F9.3,F9.3 +RFR001,4,RFRACTION of a,Ordinal,Input,8,Right,F6.4,F6.4 count,5,N of a,Scale,Input,8,Right,F6.0,F6.0 -Rb,6,RANK of b,Scale,Input,8,Right,F9.3,F9.3 -RFR002,7,RFRACTION of b,Scale,Input,8,Right,F6.4,F6.4 +Rb,6,RANK of b,Ordinal,Input,8,Right,F9.3,F9.3 +RFR002,7,RFRACTION of b,Ordinal,Input,8,Right,F6.4,F6.4 Nb,8,N of b,Scale,Input,8,Right,F6.0,F6.0 Table: Data List @@ -477,10 +477,10 @@ foo,RAN001,RANK Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format -foo,1,,Scale,Input,8,Right,F8.2,F8.2 -rfoo,2,,Scale,Input,8,Right,F8.2,F8.2 -ran003,3,,Scale,Input,8,Right,F8.2,F8.2 -RAN001,4,RANK of foo,Scale,Input,8,Right,F9.3,F9.3 +foo,1,,Nominal,Input,8,Right,F8.2,F8.2 +rfoo,2,,Nominal,Input,8,Right,F8.2,F8.2 +ran003,3,,Nominal,Input,8,Right,F8.2,F8.2 +RAN001,4,RANK of foo,Ordinal,Input,8,Right,F9.3,F9.3 ]) AT_CLEANUP diff --git a/tests/perl-module.at b/tests/perl-module.at index 58181c5f98..b40fc26b02 100644 --- a/tests/perl-module.at +++ b/tests/perl-module.at @@ -469,7 +469,7 @@ AT_CHECK([pspp -O format=csv dump-dicts.sps], [0], Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values string,1,A Short String Variable,Nominal,Input,8,Left,A8,A8,"""3333 """ longstring,2,A Long String Variable,Nominal,Input,12,Left,A12,A12, -numeric,3,A Numeric Variable,Scale,Input,8,Right,F10.0,F10.0,9; 5; 999 +numeric,3,A Numeric Variable,Nominal,Input,8,Right,F10.0,F10.0,9; 5; 999 date,4,A Date Variable,Scale,Input,8,Right,DATE11,DATE11, dollar,5,A Dollar Variable,Scale,Input,8,Right,DOLLAR11.2,DOLLAR11.2, datetime,6,A Datetime Variable,Scale,Input,8,Right,DATETIME17.0,DATETIME17.0, @@ -504,7 +504,7 @@ Table: Variables Name,Position,Label,Measurement Level,Role,Width,Alignment,Print Format,Write Format,Missing Values string,1,A Short String Variable,Nominal,Input,8,Left,A8,A8,"""3333 """ longstring,2,A Long String Variable,Nominal,Input,12,Left,A12,A12, -numeric,3,A Numeric Variable,Scale,Input,8,Right,F10.0,F10.0,9; 5; 999 +numeric,3,A Numeric Variable,Nominal,Input,8,Right,F10.0,F10.0,9; 5; 999 date,4,A Date Variable,Scale,Input,8,Right,DATE11,DATE11, dollar,5,A Dollar Variable,Scale,Input,8,Right,DOLLAR11.2,DOLLAR11.2, datetime,6,A Datetime Variable,Scale,Input,8,Right,DATETIME17.0,DATETIME17.0, -- 2.30.2