From 283e2b1c7e9223e0d367aa46f2ecfdc66a727712 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 29 Apr 2023 14:36:56 -0700 Subject: [PATCH] Be more careful about checking variable names. --- src/data/dictionary.c | 95 ++++++++++++++++------------ src/data/dictionary.h | 12 ++-- src/data/gnumeric-reader.c | 15 ++--- src/data/identifier.h | 6 +- src/data/identifier2.c | 66 ++++++++++++++++--- src/data/mrset.c | 11 +--- src/data/ods-reader.c | 14 ++-- src/data/pc+-file-reader.c | 8 +-- src/data/por-file-reader.c | 23 ++----- src/data/psql-reader.c | 9 +-- src/data/sys-file-reader.c | 52 ++++----------- src/language/commands/attributes.c | 3 +- src/language/commands/get-data.c | 2 +- src/language/commands/matrix.c | 2 +- src/language/commands/vector.c | 6 +- src/language/lexer/variable-parser.c | 18 +++--- src/language/lexer/variable-parser.h | 4 +- src/ui/gui/psppire-dict.c | 6 +- src/ui/gui/psppire-import-textfile.c | 8 +-- tests/data/dictionary.at | 5 +- tests/data/pc+-file-reader.at | 6 +- tests/data/sys-file-reader.at | 8 +-- tests/language/commands/mrsets.at | 2 +- 23 files changed, 191 insertions(+), 190 deletions(-) diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 5c8d6e8dc3..10768ce921 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -81,6 +81,10 @@ struct dictionary struct varset **varsets; /* Variable sets. */ size_t n_varsets; /* Number of variable sets. */ + /* Number of VAR### names created by dict_make_unique_var_name(), or + less. */ + unsigned long int n_unique_names; + /* Whether variable names must be valid identifiers. Normally, this is true, but sometimes a dictionary is prepared for external use (e.g. output to a CSV file) where names don't have to be valid. */ @@ -176,14 +180,15 @@ dict_get_encoding (const struct dictionary *d) } /* Checks whether UTF-8 string ID is an acceptable identifier in DICT's - encoding. Returns true if it is, otherwise an error message that the caller - must free(). */ + encoding for a variable in the classes in CLASSES. Returns true if it is, + otherwise an error message that the caller must free(). */ char * WARN_UNUSED_RESULT -dict_id_is_valid__ (const struct dictionary *dict, const char *id) +dict_id_is_valid__ (const struct dictionary *dict, const char *id, + enum dict_class classes) { if (!dict->names_must_be_ids) return NULL; - return id_is_valid__ (id, dict->encoding); + return id_is_valid__ (id, dict->encoding, classes); } static bool @@ -199,11 +204,12 @@ error_to_bool (char *error) } /* Returns true if UTF-8 string ID is an acceptable identifier in DICT's - encoding, false otherwise. */ + encoding for a variable in the classes in CLASSES, false otherwise. */ bool -dict_id_is_valid (const struct dictionary *dict, const char *id) +dict_id_is_valid (const struct dictionary *dict, const char *id, + enum dict_class classes) { - return error_to_bool (dict_id_is_valid__ (dict, id)); + return error_to_bool (dict_id_is_valid__ (dict, id, classes)); } void @@ -460,6 +466,8 @@ dict_clear_split_vars (struct dictionary *d) static void dict_delete_var__ (struct dictionary *d, struct variable *v, bool skip_callbacks) { + d->n_unique_names = 0; + int dict_index = var_get_dict_index (v); assert (dict_contains_var (d, v)); @@ -621,9 +629,7 @@ dict_clear__ (struct dictionary *d, bool skip_callbacks) /* FIXME? Should we really clear case_limit, label, documents? Others are necessarily cleared by deleting all the variables.*/ while (d->n_vars > 0) - { - dict_delete_var__ (d, d->vars[d->n_vars - 1].var, skip_callbacks); - } + dict_delete_var__ (d, d->vars[d->n_vars - 1].var, skip_callbacks); free (d->vars); d->vars = NULL; @@ -861,6 +867,25 @@ dict_clone_var_as_assert (struct dictionary *d, const struct variable *old_var, return add_var (d, new_var); } +/* Creates and returns a new variable in DICT with the given WIDTH. Uses HINT + as the variable name, if it is nonnull, not already in use in DICT, and a + valid name for a DC_ORDINARY variable; otherwise, chooses a unique name with + HINT as a hint. */ +struct variable * +dict_create_var_with_unique_name (struct dictionary *dict, const char *hint, + int width) +{ + const char *name = (hint + && dict_id_is_valid (dict, hint, DC_ORDINARY) + && !dict_lookup_var (dict, hint) + ? hint + : dict_make_unique_var_name (dict, hint)); + struct variable *var = dict_create_var_assert (dict, name, width); + if (name != hint) + free (CONST_CAST (char *, name)); + return var; +} + /* Returns the variable named NAME in D, or a null pointer if no variable has that name. */ struct variable * @@ -960,11 +985,13 @@ dict_reorder_vars (struct dictionary *d, reindex_vars (d, 0, d->n_vars, false); } -/* Changes the name of variable V that is currently in a dictionary to +/* Changes the name of variable V that is currently in dictionary D to NEW_NAME. */ static void -rename_var (struct variable *v, const char *new_name) +rename_var (struct dictionary *d, struct variable *v, const char *new_name) { + d->n_unique_names = 0; + struct vardict_info *vardict = var_get_vardict (v); var_clear_vardict (v); var_set_name (v, new_name); @@ -985,7 +1012,7 @@ dict_try_rename_var (struct dictionary *d, struct variable *v, struct variable *old = var_clone (v); unindex_var (d, var_get_vardict (v)); - rename_var (v, new_name); + rename_var (d, v, new_name); reindex_var (d, var_get_vardict (v), false); if (settings_get_algorithm () == ENHANCED) @@ -1040,7 +1067,7 @@ dict_rename_vars (struct dictionary *d, for (i = 0; i < count; i++) { unindex_var (d, var_get_vardict (vars[i])); - rename_var (vars[i], new_names[i]); + rename_var (d, vars[i], new_names[i]); } /* Add the renamed variables back into the name hash, @@ -1061,7 +1088,7 @@ dict_rename_vars (struct dictionary *d, for (i = 0; i < count; i++) { - rename_var (vars[i], old_names[i]); + rename_var (d, vars[i], old_names[i]); reindex_var (d, var_get_vardict (vars[i]), false); } @@ -1116,7 +1143,7 @@ make_hinted_name (const struct dictionary *dict, const char *hint) mblen = u8_mbtouc (&uc, CHAR_CAST (const uint8_t *, hint + ofs), hint_len - ofs); if (rp == root - ? lex_uc_is_id1 (uc) && uc != '$' + ? lex_uc_is_id1 (uc) && uc != '$' && uc != '#' && uc != '@' : lex_uc_is_idn (uc)) { if (dropped) @@ -1163,47 +1190,33 @@ make_hinted_name (const struct dictionary *dict, const char *hint) } static char * -make_numeric_name (const struct dictionary *dict, unsigned long int *num_start) +make_numeric_name (struct dictionary *dict) { - unsigned long int number; - - for (number = num_start != NULL ? MAX (*num_start, 1) : 1; - number < ULONG_MAX; - number++) + while (dict->n_unique_names++ < ULONG_MAX) { - char name[3 + INT_STRLEN_BOUND (number) + 1]; - - sprintf (name, "VAR%03lu", number); + char *name = xasprintf ("VAR%03lu", dict->n_unique_names); if (dict_lookup_var (dict, name) == NULL) - { - if (num_start != NULL) - *num_start = number + 1; - return xstrdup (name); - } + return name; + free (name); } NOT_REACHED (); } - /* Devises and returns a variable name unique within DICT. The variable name is owned by the caller, which must free it with free() when it is no longer - needed. + needed. The variable name will not begin with '$' or '#' or '@'. HINT, if it is non-null, is used as a suggestion that will be modified for suitability as a variable name and for uniqueness. - If HINT is null or entirely unsuitable, a name in the form - "VAR%03d" will be generated, where the smallest unused integer - value is used. If NUM_START is non-null, then its value is - used as the minimum numeric value to check, and it is updated - to the next value to be checked. -*/ + If HINT is null or entirely unsuitable, uses a name in the form "VAR%03d", + using the smallest available integer. */ char * -dict_make_unique_var_name (const struct dictionary *dict, const char *hint, - unsigned long int *num_start) +dict_make_unique_var_name (const struct dictionary *dict_, const char *hint) { + struct dictionary *dict = CONST_CAST (struct dictionary *, dict_); if (hint != NULL) { char *hinted_name = make_hinted_name (dict, hint); @@ -1211,7 +1224,7 @@ dict_make_unique_var_name (const struct dictionary *dict, const char *hint, return hinted_name; } - return make_numeric_name (dict, num_start); + return make_numeric_name (dict); } /* Returns whether variable names must be valid identifiers. Normally, this is diff --git a/src/data/dictionary.h b/src/data/dictionary.h index efb587cf75..284d71ca95 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -64,6 +64,9 @@ struct variable *dict_clone_var_as (struct dictionary *, struct variable *dict_clone_var_as_assert (struct dictionary *, const struct variable *, const char *); +struct variable *dict_create_var_with_unique_name (struct dictionary *, + const char *hint, + int width); /* Deleting variables. */ void dict_delete_var (struct dictionary *, struct variable *); @@ -86,8 +89,7 @@ void dict_rename_var (struct dictionary *, struct variable *, const char *); bool dict_rename_vars (struct dictionary *, struct variable **, char **new_names, size_t count, char **err_name); -char *dict_make_unique_var_name (const struct dictionary *, const char *hint, - unsigned long int *num_start); +char *dict_make_unique_var_name (const struct dictionary *, const char *hint); bool dict_get_names_must_be_ids (const struct dictionary *); void dict_set_names_must_be_ids (struct dictionary *, bool); @@ -192,9 +194,11 @@ bool dict_has_attributes (const struct dictionary *); /* Data encoding. */ const char *dict_get_encoding (const struct dictionary *d); -char *dict_id_is_valid__ (const struct dictionary *, const char *id) +char *dict_id_is_valid__ (const struct dictionary *, const char *id, + enum dict_class) WARN_UNUSED_RESULT; -bool dict_id_is_valid (const struct dictionary *, const char *id); +bool dict_id_is_valid (const struct dictionary *, const char *id, + enum dict_class); /* Functions to be called upon dictionary changes. */ struct dict_callbacks diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index a7f2d04aab..87d9db6a69 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -684,7 +684,6 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, int type = 0; int x = 0; struct gnumeric_reader *r = NULL; - unsigned long int vstart = 0; int ret; casenumber n_cases = CASENUMBER_MAX; int i; @@ -853,19 +852,17 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, for (i = 0 ; i < n_var_specs ; ++i) { - char *name; - - if ((var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) + struct var_spec *vs = &var_spec[i]; + if (vs->name == NULL && !vs->first_value) continue; /* Probably no data exists for this variable, so allocate a default width */ - if (var_spec[i].width == -1) - var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH; + if (vs->width == -1) + vs->width = SPREADSHEET_DEFAULT_WIDTH; - name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart); - dict_create_var (r->spreadsheet.dict, name, var_spec[i].width); - free (name); + dict_create_var_with_unique_name (r->spreadsheet.dict, vs->name, + vs->width); } /* Create the first case, and cache it */ diff --git a/src/data/identifier.h b/src/data/identifier.h index 4a23dd7dd5..8fc25cccba 100644 --- a/src/data/identifier.h +++ b/src/data/identifier.h @@ -20,6 +20,7 @@ #include #include #include +#include "data/dict-class.h" #include "libpspp/str.h" #include "gl/verify.h" @@ -89,9 +90,10 @@ bool lex_is_keyword (enum token_type); /* Validating identifiers. */ #define ID_MAX_LEN 64 /* Maximum length of identifier, in bytes. */ -bool id_is_valid (const char *id, const char *dict_encoding); +bool id_is_valid (const char *id, const char *dict_encoding, enum dict_class); bool id_is_plausible (const char *id); -char *id_is_valid__ (const char *id, const char *dict_encoding) +char *id_is_valid__ (const char *id, const char *dict_encoding, + enum dict_class) WARN_UNUSED_RESULT; char *id_is_plausible__ (const char *id) WARN_UNUSED_RESULT; diff --git a/src/data/identifier2.c b/src/data/identifier2.c index d21c6bba5b..53f98ac6d4 100644 --- a/src/data/identifier2.c +++ b/src/data/identifier2.c @@ -25,6 +25,7 @@ #include #include +#include "libpspp/assertion.h" #include "libpspp/cast.h" #include "libpspp/i18n.h" #include "libpspp/message.h" @@ -47,11 +48,15 @@ error_to_bool (char *error) } /* Checks whether if UTF-8 string ID is an acceptable identifier in encoding - DICT_ENCODING (UTF-8 if null). Returns NULL if it is acceptable, otherwise - an error message that the caller must free(). */ + DICT_ENCODING (UTF-8 if null) for a variable in one of the classes in + CLASSES. Returns NULL if it is acceptable, otherwise an error message that + the caller must free(). */ char * WARN_UNUSED_RESULT -id_is_valid__ (const char *id, const char *dict_encoding) +id_is_valid__ (const char *id, const char *dict_encoding, + enum dict_class classes) { + assert (classes && !(classes & ~DC_ALL)); + char *error = id_is_plausible__ (id); if (error) return error; @@ -59,13 +64,55 @@ id_is_valid__ (const char *id, const char *dict_encoding) size_t dict_len; if (dict_encoding != NULL) { - /* XXX need to reject recoded strings that contain the fallback - character. */ - dict_len = recode_string_len (dict_encoding, "UTF-8", id, -1); + struct substring out; + bool ok = !recode_pedantically (dict_encoding, "UTF-8", ss_cstr (id), + NULL, &out); + dict_len = ss_length (out); + ss_dealloc (&out); + if (!ok) + return xasprintf (_("Identifier `%s' is not valid in encoding `%s'." + "used for this dictionary."), id, dict_encoding); } else dict_len = strlen (id); + enum dict_class c = dict_class_from_id (id); + if (!(classes & c)) + { + switch (c) + { + case DC_ORDINARY: + switch ((int) classes) + { + case DC_SYSTEM: + return xasprintf (_("`%s' is not valid here because this " + "identifier must start with `$'."), id); + + case DC_SCRATCH: + return xasprintf (_("`%s' is not valid here because this " + "identifier must start with `#'."), id); + + case DC_SYSTEM | DC_SCRATCH: + return xasprintf (_("`%s' is not valid here because this " + "identifier must start with `$' or `#'."), + id); + + case DC_ORDINARY: + default: + NOT_REACHED (); + } + NOT_REACHED (); + + case DC_SYSTEM: + return xasprintf (_("`%s' and other identifiers starting with `$' " + "are not valid here."), id); + + case DC_SCRATCH: + return xasprintf (_("`%s' and other identifiers starting with `#' " + "are not valid here."), id); + } + } + if (dict_len > ID_MAX_LEN) return xasprintf (_("Identifier `%s' exceeds %d-byte limit."), id, ID_MAX_LEN); @@ -74,11 +121,12 @@ id_is_valid__ (const char *id, const char *dict_encoding) } /* Returns true if UTF-8 string ID is an acceptable identifier in encoding - DICT_ENCODING (UTF-8 if null), false otherwise. */ + DICT_ENCODING (UTF-8 if null) for variable in one of the classes in CLASSES, + false otherwise. */ bool -id_is_valid (const char *id, const char *dict_encoding) +id_is_valid (const char *id, const char *dict_encoding, enum dict_class classes) { - return error_to_bool (id_is_valid__ (id, dict_encoding)); + return error_to_bool (id_is_valid__ (id, dict_encoding, classes)); } /* Checks whether UTF-8 string ID is an plausible identifier. Returns NULL if diff --git a/src/data/mrset.c b/src/data/mrset.c index c5e6cd2ec2..c5e00439d5 100644 --- a/src/data/mrset.c +++ b/src/data/mrset.c @@ -73,16 +73,7 @@ mrset_destroy (struct mrset *mrset) char * WARN_UNUSED_RESULT mrset_is_valid_name__ (const char *name, const char *dict_encoding) { - char *error = id_is_valid__ (name, dict_encoding); - if (error) - return error; - - if (name[0] != '$') - return xasprintf (_("%s is not a valid name for a multiple response " - "set. Multiple response set names must begin with " - "`$'."), name); - - return NULL; + return id_is_valid__ (name, dict_encoding, DC_SYSTEM); } static bool diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c index c952723645..f05d682729 100644 --- a/src/data/ods-reader.c +++ b/src/data/ods-reader.c @@ -771,7 +771,6 @@ ods_make_reader (struct spreadsheet *spreadsheet, { intf ret = 0; xmlChar *type = NULL; - unsigned long int vstart = 0; casenumber n_cases = CASENUMBER_MAX; int i; struct var_spec *var_spec = NULL; @@ -936,15 +935,11 @@ ods_make_reader (struct spreadsheet *spreadsheet, for (i = 0; i < n_var_specs ; ++i) { - struct fmt_spec fmt; - struct variable *var = NULL; - char *name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart); - int width = xmv_to_width (&var_spec[i].firstval, opts->asw); - dict_create_var (r->spreadsheet.dict, name, width); - free (name); - - var = dict_get_var (r->spreadsheet.dict, i); + int width = xmv_to_width (&var_spec[i].firstval, opts->asw); + struct variable *var = dict_create_var_with_unique_name ( + r->spreadsheet.dict, var_spec[i].name, width); + struct fmt_spec fmt; if (0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date"))) { fmt.type = FMT_DATE; @@ -953,7 +948,6 @@ ods_make_reader (struct spreadsheet *spreadsheet, } else fmt = fmt_default_for_width (width); - var_set_both_formats (var, fmt); } diff --git a/src/data/pc+-file-reader.c b/src/data/pc+-file-reader.c index 73b9ea804b..8c35724138 100644 --- a/src/data/pc+-file-reader.c +++ b/src/data/pc+-file-reader.c @@ -843,7 +843,7 @@ parse_variable_records (struct pcp_reader *r, struct dictionary *dict, continue; } - if (!dict_id_is_valid (dict, name) || name[0] == '#') + if (!dict_id_is_valid (dict, name, DC_ORDINARY)) { pcp_error (r, rec->pos, _("Invalid variable name `%s'."), name); return false; @@ -852,12 +852,10 @@ parse_variable_records (struct pcp_reader *r, struct dictionary *dict, struct variable *var = dict_create_var (dict, name, rec->width); if (var == NULL) { - char *new_name = dict_make_unique_var_name (dict, NULL, NULL); + var = dict_create_var_with_unique_name (dict, name, rec->width); pcp_warn (r, rec->pos, _("Renaming variable with duplicate name " "`%s' to `%s'."), - name, new_name); - var = dict_create_var_assert (dict, new_name, rec->width); - free (new_name); + name, var_get_name (var)); } if (rec->weight) { diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 2487ab2cb4..50c4495ff8 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -710,28 +710,13 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) for (j = 0; j < 6; j++) fmt[j] = read_int (r); - if (!dict_id_is_valid (dict, name) || *name == '#' || *name == '$') - error (r, _("Invalid variable name `%s' in position %d."), name, i); - str_uppercase (name); - if (width < 0 || width > 255) error (r, _("Bad width %d for variable %s."), width, name); - v = dict_create_var (dict, name, width); - if (v == NULL) - { - unsigned long int i; - for (i = 1; ; i++) - { - char *try_name = xasprintf ("%s_%lu", name, i); - v = dict_create_var (dict, try_name, width); - free (try_name); - if (v != NULL) - break; - } - warning (r, _("Duplicate variable name %s in position %d renamed " - "to %s."), name, i, var_get_name (v)); - } + v = dict_create_var_with_unique_name (dict, name, width); + if (utf8_strcasecmp (name, var_get_name (v))) + warning (r, _("Invalid or duplicate variable name %s in position %d " + "renamed to %s."), name, i, var_get_name (v)); print = convert_format (r, &fmt[0], v, &report_error); write = convert_format (r, &fmt[3], v, &report_error); diff --git a/src/data/psql-reader.c b/src/data/psql-reader.c index 8b83d018de..63d9cad3d1 100644 --- a/src/data/psql-reader.c +++ b/src/data/psql-reader.c @@ -180,13 +180,8 @@ static struct variable * create_var (struct psql_reader *r, struct fmt_spec fmt, int width, const char *suggested_name, int col) { - unsigned long int vx = 0; - struct variable *var; - char *name; - - name = dict_make_unique_var_name (r->dict, suggested_name, &vx); - var = dict_create_var (r->dict, name, width); - free (name); + struct variable *var + = dict_create_var_with_unique_name (r->dict, suggested_name, width); var_set_both_formats (var, fmt); diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 491df7f996..b0f60a00c2 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1400,15 +1400,6 @@ parse_header (struct sfm_reader *r, const struct sfm_header_record *header, info->product = ss_xstrdup (product); } -static struct variable * -add_var_with_generated_name (struct dictionary *dict, int width) -{ - char *name = dict_make_unique_var_name (dict, NULL, NULL); - struct variable *var = dict_create_var_assert (dict, name, width); - free (name); - return var; -} - /* Reads a variable (type 2) record from R and adds the corresponding variable to DICT. Also skips past additional variable records for long string @@ -1423,12 +1414,10 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, for (rec = var_recs; rec < &var_recs[n_var_recs];) { - size_t n_values; - char *name; - size_t i; + char *name = recode_string_pool ("UTF-8", dict_encoding, + rec->name, -1, r->pool); - name = recode_string_pool ("UTF-8", dict_encoding, - rec->name, -1, r->pool); + /* Names are right-padded with spaces. */ name[strcspn (name, " ")] = '\0'; if (rec->width < 0 || rec->width > 255) @@ -1438,25 +1427,13 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, return false; } - struct variable *var; - if (!dict_id_is_valid (dict, name) || name[0] == '$' || name[0] == '#') - { - var = add_var_with_generated_name (dict, rec->width); - sys_warn (r, rec->pos, _("Renaming variable with invalid name " - "`%s' to `%s'."), name, var_get_name (var)); - } - else - { - var = dict_create_var (dict, name, rec->width); - if (var == NULL) - { - var = add_var_with_generated_name (dict, rec->width); - sys_warn (r, rec->pos, _("Renaming variable with duplicate name " - "`%s' to `%s'."), - name, var_get_name (var)); - } - } + struct variable *var = dict_create_var_with_unique_name (dict, name, + rec->width); rec->var = var; + if (strcmp (var_get_name (var), name)) + sys_warn (r, rec->pos, _("Renaming variable with invalid or duplicate " + "name `%s' to `%s'."), + name, var_get_name (var)); /* Set the short name the same as the long name (even if we renamed it). */ @@ -1500,14 +1477,14 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, ofs += 16; } - for (i = 0; i < n_discrete; i++) + for (size_t i = 0; i < n_discrete; i++) { mv_add_num (&mv, parse_float (r, rec->missing, ofs)); ofs += 8; } } else - for (i = 0; i < rec->missing_value_code; i++) + for (size_t i = 0; i < rec->missing_value_code; i++) mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8)); var_set_missing_values (var, &mv); } @@ -1520,8 +1497,8 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, /* Account for values. Skip long string continuation records, if any. */ - n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8); - for (i = 1; i < n_values; i++) + size_t n_values = rec->width == 0 ? 1 : DIV_RND_UP (rec->width, 8); + for (size_t i = 1; i < n_values; i++) if (i + (rec - var_recs) >= n_var_recs || rec[i].width != -1) { sys_error (r, rec->pos, _("Missing string continuation record.")); @@ -2056,8 +2033,7 @@ parse_long_var_name_map (struct sfm_reader *r, while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { /* Validate long name. */ - if (!dict_id_is_valid (dict, long_name) - || long_name[0] == '$' || long_name[0] == '#') + if (!dict_id_is_valid (dict, long_name, DC_ORDINARY)) { sys_warn (r, record->pos, _("Long variable mapping from %s to invalid " diff --git a/src/language/commands/attributes.c b/src/language/commands/attributes.c index e646af3847..deaadc1847 100644 --- a/src/language/commands/attributes.c +++ b/src/language/commands/attributes.c @@ -88,7 +88,8 @@ parse_attribute_name (struct lexer *lexer, const char *dict_encoding, { if (!lex_force_id (lexer)) return NULL; - char *error = id_is_valid__ (lex_tokcstr (lexer), dict_encoding); + char *error = id_is_valid__ (lex_tokcstr (lexer), dict_encoding, + DC_ORDINARY | DC_SYSTEM | DC_SCRATCH); if (error) { lex_error (lexer, "%s", error); diff --git a/src/language/commands/get-data.c b/src/language/commands/get-data.c index 377e40c555..93c933ad6a 100644 --- a/src/language/commands/get-data.c +++ b/src/language/commands/get-data.c @@ -511,7 +511,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) if (!lex_force_id (lexer)) goto error; name = xstrdup (lex_tokcstr (lexer)); - char *error = dict_id_is_valid__ (dict, name); + char *error = dict_id_is_valid__ (dict, name, DC_ORDINARY); if (error) { lex_error (lexer, "%s", error); diff --git a/src/language/commands/matrix.c b/src/language/commands/matrix.c index 4dd70dcc84..bd6916a951 100644 --- a/src/language/commands/matrix.c +++ b/src/language/commands/matrix.c @@ -6170,7 +6170,7 @@ save_file_open (struct save_file *sf, gsl_matrix *m, for (size_t i = 0; i < nv.size; i++) { char *name = trimmed_string (gsl_vector_get (&nv, i)); - char *error = dict_id_is_valid__ (dict, name); + char *error = dict_id_is_valid__ (dict, name, DC_ORDINARY); if (!error) string_array_append_nocopy (&names, name); else diff --git a/src/language/commands/vector.c b/src/language/commands/vector.c index b9407cbb6a..cece0afb38 100644 --- a/src/language/commands/vector.c +++ b/src/language/commands/vector.c @@ -58,7 +58,8 @@ cmd_vector (struct lexer *lexer, struct dataset *ds) size_t allocated_vectors = 0; while (lex_token (lexer) == T_ID) { - char *error = dict_id_is_valid__ (dict, lex_tokcstr (lexer)); + char *error = dict_id_is_valid__ (dict, lex_tokcstr (lexer), + DC_ORDINARY | DC_SCRATCH); if (error) { lex_error (lexer, "%s", error); @@ -176,7 +177,8 @@ cmd_vector (struct lexer *lexer, struct dataset *ds) for (size_t j = 0; j < n_vars; j++) { char *name = xasprintf ("%s%zu", vectors[i], j + 1); - char *error = dict_id_is_valid__ (dict, name); + char *error = dict_id_is_valid__ (dict, name, + DC_ORDINARY | DC_SCRATCH); if (error) { lex_ofs_error (lexer, vectors_start, end_ofs, "%s", error); diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index 57f7042eca..02bfcc9dfd 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -429,14 +429,15 @@ fail: } char * -parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d) +parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d, + enum dict_class classes) { if (!is_dict_name_token (lexer, d)) { lex_error (lexer, ("Syntax error expecting variable name.")); return NULL; } - char *error = dict_id_is_valid__ (d, lex_tokcstr (lexer)); + char *error = dict_id_is_valid__ (d, lex_tokcstr (lexer), classes); if (error) { lex_error (lexer, "%s", error); @@ -547,18 +548,15 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, names = NULL; } + enum dict_class classes = (pv_opts & PV_NO_SCRATCH + ? DC_ORDINARY + : DC_ORDINARY | DC_SCRATCH); do { int start_ofs = lex_ofs (lexer); - name1 = parse_DATA_LIST_var (lexer, dict); + name1 = parse_DATA_LIST_var (lexer, dict, classes); if (!name1) goto exit; - if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH) - { - lex_ofs_error (lexer, start_ofs, start_ofs, - _("Scratch variables not allowed here.")); - goto exit; - } if (lex_match (lexer, T_TO)) { unsigned long int num1, num2; @@ -566,7 +564,7 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, int root_len1, root_len2; unsigned long int number; - name2 = parse_DATA_LIST_var (lexer, dict); + name2 = parse_DATA_LIST_var (lexer, dict, classes); if (!name2) goto exit; int end_ofs = lex_ofs (lexer) - 1; diff --git a/src/language/lexer/variable-parser.h b/src/language/lexer/variable-parser.h index 8fd4824671..abc92825aa 100644 --- a/src/language/lexer/variable-parser.h +++ b/src/language/lexer/variable-parser.h @@ -19,6 +19,7 @@ #include #include +#include "data/dict-class.h" struct pool; struct dictionary; @@ -60,7 +61,8 @@ bool parse_variables_pool (struct lexer *, struct pool *, const struct dictionar bool parse_var_set_vars (struct lexer *, const struct var_set *, struct variable ***, size_t *, int opts); -char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *); +char *parse_DATA_LIST_var (struct lexer *, const struct dictionary *, + enum dict_class); bool parse_DATA_LIST_vars (struct lexer *, const struct dictionary *, char ***names, size_t *n, int opts); bool parse_DATA_LIST_vars_pool (struct lexer *, const struct dictionary *, diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c index 32a5cc13e2..dbad71aa16 100644 --- a/src/ui/gui/psppire-dict.c +++ b/src/ui/gui/psppire-dict.c @@ -471,7 +471,7 @@ psppire_dict_set_name (PsppireDict* d, gint idx, const gchar *name) g_assert (d); g_assert (PSPPIRE_IS_DICT (d)); - if (! dict_id_is_valid (d->dict, name)) + if (! dict_id_is_valid (d->dict, name, DC_ORDINARY)) return FALSE; if (idx < dict_get_n_vars (d->dict)) @@ -561,7 +561,7 @@ gboolean psppire_dict_check_name (const PsppireDict *dict, const gchar *name) { - return (dict_id_is_valid (dict->dict, name) + return (dict_id_is_valid (dict->dict, name, DC_ORDINARY) && !psppire_dict_lookup_var (dict, name)); } @@ -879,7 +879,7 @@ gboolean psppire_dict_rename_var (PsppireDict *dict, struct variable *v, const gchar *name) { - if (! dict_id_is_valid (dict->dict, name)) + if (! dict_id_is_valid (dict->dict, name, DC_ORDINARY)) return FALSE; /* Make sure no other variable has this name */ diff --git a/src/ui/gui/psppire-import-textfile.c b/src/ui/gui/psppire-import-textfile.c index 4b5c0963c8..167c04ccea 100644 --- a/src/ui/gui/psppire-import-textfile.c +++ b/src/ui/gui/psppire-import-textfile.c @@ -427,7 +427,6 @@ static void choose_column_names (PsppireImportAssistant *ia) { int i; - unsigned long int generated_name_count = 0; char *encoding = NULL; g_object_get (ia->text_file, "encoding", &encoding, NULL); if (ia->dict) @@ -446,12 +445,7 @@ choose_column_names (PsppireImportAssistant *ia) candidate_name = psppire_delimited_text_get_header_title (PSPPIRE_DELIMITED_TEXT (ia->delimiters_model), i); } - char *name = dict_make_unique_var_name (ia->dict, - candidate_name, - &generated_name_count); - - dict_create_var_assert (ia->dict, name, 0); - free (name); + dict_create_var_with_unique_name (ia->dict, candidate_name, 0); } } diff --git a/tests/data/dictionary.at b/tests/data/dictionary.at index e7e589e881..aec431b4d4 100644 --- a/tests/data/dictionary.at +++ b/tests/data/dictionary.at @@ -18,6 +18,7 @@ AT_BANNER([dictionary]) AT_SETUP([dictionary case-insensitivity]) AT_DATA([dictionary.sps], [dnl +SET LOCALE='UTF-8'. DATA LIST LIST /aèiöu aeiou. BEGIN DATA 1 2 @@ -42,8 +43,8 @@ Table: Data List AÈIÖU,aeiou 1.00,2.00 -"dictionary.sps:8.17-8.29: error: RENAME VARIABLES: Renaming would duplicate variable name aèiöu. - 8 | RENAME VARIABLE (aeiou=aèiöu). +"dictionary.sps:9.17-9.29: error: RENAME VARIABLES: Renaming would duplicate variable name aèiöu. + 9 | RENAME VARIABLE (aeiou=aèiöu). | ^~~~~~~~~~~~~" ]) diff --git a/tests/data/pc+-file-reader.at b/tests/data/pc+-file-reader.at index 115b2f953f..0fc02c169d 100644 --- a/tests/data/pc+-file-reader.at +++ b/tests/data/pc+-file-reader.at @@ -1049,11 +1049,11 @@ AT_DATA([pc+-file.sps], [dnl GET FILE='pc+-file.sav' ENCODING='us-ascii'. ]) AT_CHECK([pspp -O format=csv pc+-file.sps], [0], [dnl -warning: `pc+-file.sav' near offset 0x230: Renaming variable with duplicate name `NUM1' to `VAR001'. +warning: `pc+-file.sav' near offset 0x230: Renaming variable with duplicate name `NUM1' to `NUM1_A'. -warning: `pc+-file.sav' near offset 0x250: Renaming variable with duplicate name `NUM1' to `VAR002'. +warning: `pc+-file.sav' near offset 0x250: Renaming variable with duplicate name `NUM1' to `NUM1_B'. -warning: `pc+-file.sav' near offset 0x270: Renaming variable with duplicate name `NUM1' to `VAR003'. +warning: `pc+-file.sav' near offset 0x270: Renaming variable with duplicate name `NUM1' to `NUM1_C'. ]) AT_CLEANUP diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 4057109e1b..7771c27155 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -1748,7 +1748,7 @@ for variant in be le; do AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'. ]) AT_CHECK([pspp -O format=csv sys-file.sps], 0, - [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid name `$UM1' to `VAR001'. + [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid or duplicate name `$UM1' to `UM1'. ]) done AT_CLEANUP @@ -1774,7 +1774,7 @@ for variant in be le; do AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'. ]) AT_CHECK([pspp -O format=csv sys-file.sps], 0, - [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid name `TO' to `VAR001'. + [warning: `sys-file.sav' near offset 0xb4: Renaming variable with invalid or duplicate name `TO' to `TO_A'. ]) done AT_CLEANUP @@ -1830,12 +1830,12 @@ for variant in be le; do DISPLAY DICTIONARY. ]) AT_CHECK([pspp -O format=csv sys-file.sps], [0], - [warning: `sys-file.sav' near offset 0xd4: Renaming variable with duplicate name `VAR1' to `VAR001'. + [warning: `sys-file.sav' near offset 0xd4: Renaming variable with invalid or duplicate name `VAR1' to `VAR1_A'. Table: Variables Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format var1,1,Unknown,Input,8,Right,F8.0,F8.0 -var001,2,Unknown,Input,8,Right,F8.0,F8.0 +var1_a,2,Unknown,Input,8,Right,F8.0,F8.0 ]) done AT_CLEANUP diff --git a/tests/language/commands/mrsets.at b/tests/language/commands/mrsets.at index c805b64455..58ddd1e771 100644 --- a/tests/language/commands/mrsets.at +++ b/tests/language/commands/mrsets.at @@ -278,7 +278,7 @@ AT_CHECK([pspp -O format=csv mrsets.sps], [1], [dnl 20 | MRSETS /MDGROUP NAME=**. | ^~" -"mrsets.sps:21.22: error: MRSETS: x is not a valid name for a multiple response set. Multiple response set names must begin with `$'. +"mrsets.sps:21.22: error: MRSETS: `x' is not valid here because this identifier must start with `$'. 21 | MRSETS /MDGROUP NAME=x. | ^" -- 2.30.2