X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fvariable-parser.c;h=3b6b96ad0e2e2135d49de0ea82dcda9577be0300;hb=0fde6afee3c995bf264c24c438f43eeb58b859b5;hp=54a4e85c0e4158686f5514d0c78aeafa6b412c0a;hpb=42489b63e0b4bec2e20c2f55c9791234f7b41764;p=pspp diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index 54a4e85c0e..3b6b96ad0e 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -1,66 +1,99 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012, 2020 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include -#include +#include "language/lexer/variable-parser.h" #include +#include #include #include -#include "lexer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/variable.h" +#include "language/lexer/lexer.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/hash-functions.h" +#include "libpspp/i18n.h" +#include "libpspp/hmapx.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "libpspp/stringi-set.h" + +#include "math/interaction.h" + +#include "gl/c-ctype.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) +static struct variable *var_set_get_var (const struct var_set *, size_t); +static struct variable *var_set_lookup_var (const struct var_set *, + const char *); +static bool var_set_lookup_var_idx (const struct var_set *, const char *, + size_t *); +static bool var_set_get_names_must_be_ids (const struct var_set *); + +static bool +is_name_token (const struct lexer *lexer, bool names_must_be_ids) +{ + return (lex_token (lexer) == T_ID + || (!names_must_be_ids && lex_token (lexer) == T_STRING)); +} + +static bool +is_vs_name_token (const struct lexer *lexer, const struct var_set *vs) +{ + return is_name_token (lexer, var_set_get_names_must_be_ids (vs)); +} + +static bool +is_dict_name_token (const struct lexer *lexer, const struct dictionary *d) +{ + return is_name_token (lexer, dict_get_names_must_be_ids (d)); +} + /* Parses a name as a variable within VS. Sets *IDX to the variable's index and returns true if successful. On failure emits an error message and returns false. */ static bool -parse_vs_variable_idx (const struct var_set *vs, size_t *idx) +parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs, + size_t *idx) { assert (idx != NULL); - - if (token != T_ID) + + if (!is_vs_name_token (lexer, vs)) { - lex_error (_("expecting variable name")); + lex_error (lexer, _("expecting variable name")); return false; } - else if (var_set_lookup_var_idx (vs, tokid, idx)) + else if (var_set_lookup_var_idx (vs, lex_tokcstr (lexer), idx)) { - lex_get (); + lex_get (lexer); return true; } - else + else { - msg (SE, _("%s is not a variable name."), tokid); + msg (SE, _("%s is not a variable name."), lex_tokcstr (lexer)); return false; } } @@ -69,41 +102,33 @@ parse_vs_variable_idx (const struct var_set *vs, size_t *idx) if successful. On failure emits an error message and returns a null pointer. */ static struct variable * -parse_vs_variable (const struct var_set *vs) +parse_vs_variable (struct lexer *lexer, const struct var_set *vs) { size_t idx; - return parse_vs_variable_idx (vs, &idx) ? var_set_get_var (vs, idx) : NULL; + return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL; } /* Parses a variable name in dictionary D and returns the variable if successful. On failure emits an error message and returns a null pointer. */ struct variable * -parse_dict_variable (const struct dictionary *d) +parse_variable (struct lexer *lexer, const struct dictionary *d) { struct var_set *vs = var_set_create_from_dict (d); - struct variable *var = parse_vs_variable (vs); + struct variable *var = parse_vs_variable (lexer, vs); var_set_destroy (vs); return var; } -/* Parses a variable name in dataset_dict (current_dataset) and returns the - variable if successful. On failure emits an error message and - returns a null pointer. */ -struct variable * -parse_variable (void) -{ - return parse_dict_variable (dataset_dict (current_dataset)); -} - - /* Parses a set of variables from dictionary D given options OPTS. Resulting list of variables stored in *VAR and the number of variables into *CNT. Returns true only if - successful. */ + successful. The dictionary D must contain at least one + variable. */ bool -parse_variables (const struct dictionary *d, struct variable ***var, - size_t *cnt, int opts) +parse_variables (struct lexer *lexer, const struct dictionary *d, + struct variable ***var, + size_t *cnt, int opts) { struct var_set *vs; int success; @@ -113,9 +138,13 @@ parse_variables (const struct dictionary *d, struct variable ***var, assert (cnt != NULL); vs = var_set_create_from_dict (d); - success = parse_var_set_vars (vs, var, cnt, opts); - if ( success == 0 ) - free ( *var ) ; + if (var_set_get_cnt (vs) == 0) + { + *cnt = 0; + var_set_destroy (vs); + return false; + } + success = parse_var_set_vars (lexer, vs, var, cnt, opts); var_set_destroy (vs); return success; } @@ -126,8 +155,9 @@ parse_variables (const struct dictionary *d, struct variable ***var, successful. Same behavior as parse_variables, except that all allocations are taken from the given POOL. */ bool -parse_variables_pool (struct pool *pool, const struct dictionary *dict, - struct variable ***vars, size_t *var_cnt, int opts) +parse_variables_pool (struct lexer *lexer, struct pool *pool, + const struct dictionary *dict, + struct variable ***vars, size_t *var_cnt, int opts) { int retval; @@ -136,8 +166,8 @@ parse_variables_pool (struct pool *pool, const struct dictionary *dict, already in the pool, which would attempt to re-free it later. */ assert (!(opts & PV_APPEND)); - - retval = parse_variables (dict, vars, var_cnt, opts); + + retval = parse_variables (lexer, dict, vars, var_cnt, opts); if (retval) pool_register (pool, free, *vars); return retval; @@ -148,13 +178,14 @@ parse_variables_pool (struct pool *pool, const struct dictionary *dict, dictionary class, and returns true. Returns false on failure. */ static bool -parse_var_idx_class (const struct var_set *vs, size_t *idx, - enum dict_class *class) +parse_var_idx_class (struct lexer *lexer, const struct var_set *vs, + size_t *idx, + enum dict_class *class) { - if (!parse_vs_variable_idx (vs, idx)) + if (!parse_vs_variable_idx (lexer, vs, idx)) return false; - *class = dict_class_from_id (var_set_get_var (vs, *idx)->name); + *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx))); return true; } @@ -169,25 +200,33 @@ add_variable (struct variable ***v, size_t *nv, size_t *mv, const struct var_set *vs, size_t idx) { struct variable *add = var_set_get_var (vs, idx); + const char *add_name = var_get_name (add); - if ((pv_opts & PV_NUMERIC) && add->type != NUMERIC) + if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add)) msg (SW, _("%s is not a numeric variable. It will not be " - "included in the variable list."), add->name); - else if ((pv_opts & PV_STRING) && add->type != ALPHA) + "included in the variable list."), add_name); + else if ((pv_opts & PV_STRING) && !var_is_alpha (add)) msg (SE, _("%s is not a string variable. It will not be " - "included in the variable list."), add->name); + "included in the variable list."), add_name); else if ((pv_opts & PV_NO_SCRATCH) - && dict_class_from_id (add->name) == DC_SCRATCH) + && dict_class_from_id (add_name) == DC_SCRATCH) msg (SE, _("Scratch variables (such as %s) are not allowed " - "here."), add->name); - else if ((pv_opts & PV_SAME_TYPE) && *nv && add->type != (*v)[0]->type) + "here."), add_name); + else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv + && var_get_type (add) != var_get_type ((*v)[0])) msg (SE, _("%s and %s are not the same type. All variables in " "this variable list must be of the same type. %s " - "will be omitted from list."), - (*v)[0]->name, add->name, add->name); - else if ((pv_opts & PV_NO_DUPLICATE) && included[idx]) - msg (SE, _("Variable %s appears twice in variable list."), add->name); - else if ((pv_opts & PV_DUPLICATE) || !included[idx]) + "will be omitted from the list."), + var_get_name ((*v)[0]), add_name, add_name); + else if ((pv_opts & PV_SAME_WIDTH) && *nv + && var_get_width (add) != var_get_width ((*v)[0])) + msg (SE, _("%s and %s are string variables with different widths. " + "All variables in this variable list must have the " + "same width. %s will be omitted from the list."), + var_get_name ((*v)[0]), add_name, add_name); + else if ((pv_opts & PV_NO_DUPLICATE) && included && included[idx]) + msg (SE, _("Variable %s appears twice in variable list."), add_name); + else if ((pv_opts & PV_DUPLICATE) || !included || !included[idx]) { if (*nv >= *mv) { @@ -209,12 +248,12 @@ static void add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included, int pv_opts, const struct var_set *vs, int first_idx, int last_idx, - enum dict_class class) + enum dict_class class) { size_t i; - + for (i = first_idx; i <= last_idx; i++) - if (dict_class_from_id (var_set_get_var (vs, i)->name) == class) + if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class) add_variable (v, nv, mv, included, pv_opts, vs, i); } @@ -222,7 +261,7 @@ add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included, Conversely, if parse_variables() returns true, then *nv is nonzero and *v is non-NULL. */ bool -parse_var_set_vars (const struct var_set *vs, +parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, struct variable ***v, size_t *nv, int pv_opts) { @@ -233,11 +272,12 @@ parse_var_set_vars (const struct var_set *vs, assert (v != NULL); assert (nv != NULL); - /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE may be - specified. */ - assert ((((pv_opts & PV_NUMERIC) != 0) - + ((pv_opts & PV_STRING) != 0) - + ((pv_opts & PV_SAME_TYPE) != 0)) <= 1); + /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE, + PV_SAME_WIDTH may be specified. */ + assert (((pv_opts & PV_NUMERIC) != 0) + + ((pv_opts & PV_STRING) != 0) + + ((pv_opts & PV_SAME_TYPE) != 0) + + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1); /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */ assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE)); @@ -254,36 +294,41 @@ parse_var_set_vars (const struct var_set *vs, if (!(pv_opts & PV_DUPLICATE)) { size_t i; - + included = xcalloc (var_set_get_cnt (vs), sizeof *included); for (i = 0; i < *nv; i++) - included[(*v)[i]->index] = 1; + { + size_t index; + if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index)) + NOT_REACHED (); + included[index] = 1; + } } else included = NULL; do { - if (lex_match (T_ALL)) + if (lex_match (lexer, T_ALL)) add_variables (v, nv, &mv, included, pv_opts, vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY); - else + else { enum dict_class class; size_t first_idx; - if (!parse_var_idx_class (vs, &first_idx, &class)) + if (!parse_var_idx_class (lexer, vs, &first_idx, &class)) goto fail; - if (!lex_match (T_TO)) + if (!lex_match (lexer, T_TO)) add_variable (v, nv, &mv, included, pv_opts, vs, first_idx); - else + else { size_t last_idx; enum dict_class last_class; struct variable *first_var, *last_var; - if (!parse_var_idx_class (vs, &last_idx, &last_class)) + if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class)) goto fail; first_var = var_set_get_var (vs, first_idx); @@ -291,10 +336,11 @@ parse_var_set_vars (const struct var_set *vs, if (last_idx < first_idx) { + const char *first_name = var_get_name (first_var); + const char *last_name = var_get_name (last_var); msg (SE, _("%s TO %s is not valid syntax since %s " "precedes %s in the dictionary."), - first_var->name, last_var->name, - first_var->name, last_var->name); + first_name, last_name, first_name, last_name); goto fail; } @@ -305,23 +351,25 @@ parse_var_set_vars (const struct var_set *vs, "the same variable dictionaries, of either " "ordinary, scratch, or system variables. " "%s is a %s variable, whereas %s is %s."), - first_var->name, dict_class_to_name (class), - last_var->name, dict_class_to_name (last_class)); + var_get_name (first_var), dict_class_to_name (class), + var_get_name (last_var), + dict_class_to_name (last_class)); goto fail; } add_variables (v, nv, &mv, included, pv_opts, vs, first_idx, last_idx, class); - } + } } if (pv_opts & PV_SINGLE) break; - lex_match (','); + lex_match (lexer, T_COMMA); } - while (token == T_ALL - || (token == T_ID && var_set_lookup_var (vs, tokid) != NULL)); - + while (lex_token (lexer) == T_ALL + || (is_vs_name_token (lexer, vs) + && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL)); + if (*nv == 0) goto fail; @@ -336,163 +384,211 @@ fail: return 0; } -/* Extracts a numeric suffix from variable name S, copying it - into string R. Sets *D to the length of R and *N to its - value. */ -static int -extract_num (char *s, char *r, int *n, int *d) +char * +parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d) { - char *cp; + if (!is_dict_name_token (lexer, d)) + { + lex_error (lexer, "expecting variable name"); + return NULL; + } + if (!dict_id_is_valid (d, lex_tokcstr (lexer), true)) + return NULL; + + char *name = xstrdup (lex_tokcstr (lexer)); + lex_get (lexer); + return name; +} - /* Find first digit. */ - cp = s + strlen (s) - 1; - while (isdigit ((unsigned char) *cp) && cp > s) - cp--; - cp++; +/* Attempts to break UTF-8 encoded NAME into a root (whose contents are + arbitrary except that it does not end in a digit) followed by an integer + numeric suffix. On success, stores the value of the suffix into *NUMBERP, + the number of digits in the suffix into *N_DIGITSP, and returns the number + of bytes in the root. On failure, returns 0. */ +static int +extract_numeric_suffix (const char *name, + unsigned long int *numberp, int *n_digitsp) +{ + size_t root_len, n_digits; + size_t i; - /* Extract root. */ - strncpy (r, s, cp - s); - r[cp - s] = 0; + /* Count length of root. */ + root_len = 1; /* Valid identifier never starts with digit. */ + for (i = 1; name[i] != '\0'; i++) + if (!c_isdigit (name[i])) + root_len = i + 1; + n_digits = i - root_len; - /* Count initial zeros. */ - *n = *d = 0; - while (*cp == '0') + if (n_digits == 0) { - (*d)++; - cp++; + msg (SE, _("`%s' cannot be used with TO because it does not end in " + "a digit."), name); + return 0; } - /* Extract value. */ - while (isdigit ((unsigned char) *cp)) + *numberp = strtoull (name + root_len, NULL, 10); + if (*numberp == ULONG_MAX) { - (*d)++; - *n = (*n * 10) + (*cp - '0'); - cp++; + msg (SE, _("Numeric suffix on `%s' is larger than supported with TO."), + name); + return 0; } + *n_digitsp = n_digits; + return root_len; +} - /* Sanity check. */ - if (*n == 0 && *d == 0) +static bool +add_var_name (char *name, + char ***names, size_t *n_vars, size_t *allocated_vars, + struct stringi_set *set, int pv_opts) +{ + if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (set, name)) { - msg (SE, _("incorrect use of TO convention")); - return 0; + msg (SE, _("Variable %s appears twice in variable list."), + name); + return false; } - return 1; + + if (*n_vars >= *allocated_vars) + *names = x2nrealloc (*names, allocated_vars, sizeof **names); + (*names)[(*n_vars)++] = name; + return true; } /* Parses a list of variable names according to the DATA LIST version of the TO convention. */ bool -parse_DATA_LIST_vars (char ***names, size_t *nnames, int pv_opts) +parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, + char ***namesp, size_t *n_varsp, int pv_opts) { - int n1, n2; - int d1, d2; - int n; - size_t nvar, mvar; - char name1[LONG_NAME_LEN + 1], name2[LONG_NAME_LEN + 1]; - char root1[LONG_NAME_LEN + 1], root2[LONG_NAME_LEN + 1]; - int success = 0; + char **names; + size_t n_vars; + size_t allocated_vars; + + struct stringi_set set; + + char *name1 = NULL; + char *name2 = NULL; + + bool ok = false; - assert (names != NULL); - assert (nnames != NULL); assert ((pv_opts & ~(PV_APPEND | PV_SINGLE | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0); - /* FIXME: PV_NO_DUPLICATE is not implemented. */ + stringi_set_init (&set); if (pv_opts & PV_APPEND) - nvar = mvar = *nnames; + { + n_vars = allocated_vars = *n_varsp; + names = *namesp; + + if (pv_opts & PV_NO_DUPLICATE) + { + size_t i; + + for (i = 0; i < n_vars; i++) + stringi_set_insert (&set, names[i]); + } + } else { - nvar = mvar = 0; - *names = NULL; + n_vars = allocated_vars = 0; + names = NULL; } do { - if (token != T_ID) - { - lex_error ("expecting variable name"); - goto fail; - } - if (dict_class_from_id (tokid) == DC_SCRATCH - && (pv_opts & PV_NO_SCRATCH)) + name1 = parse_DATA_LIST_var (lexer, dict); + if (!name1) + goto exit; + if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH) { msg (SE, _("Scratch variables not allowed here.")); - goto fail; + goto exit; } - strcpy (name1, tokid); - lex_get (); - if (token == T_TO) + if (lex_match (lexer, T_TO)) { - lex_get (); - if (token != T_ID) - { - lex_error ("expecting variable name"); - goto fail; - } - strcpy (name2, tokid); - lex_get (); + unsigned long int num1, num2; + int n_digits1, n_digits2; + int root_len1, root_len2; + unsigned long int number; - if (!extract_num (name1, root1, &n1, &d1) - || !extract_num (name2, root2, &n2, &d2)) - goto fail; + name2 = parse_DATA_LIST_var (lexer, dict); + if (!name2) + goto exit; + + root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1); + if (root_len1 == 0) + goto exit; - if (strcasecmp (root1, root2)) + root_len2 = extract_numeric_suffix (name2, &num2, &n_digits2); + if (root_len2 == 0) + goto exit; + + if (root_len1 != root_len2 || memcasecmp (name1, name2, root_len1)) { msg (SE, _("Prefixes don't match in use of TO convention.")); - goto fail; + goto exit; } - if (n1 > n2) + if (num1 > num2) { msg (SE, _("Bad bounds in use of TO convention.")); - goto fail; + goto exit; } - if (d2 > d1) - d2 = d1; - if (mvar < nvar + (n2 - n1 + 1)) + for (number = num1; number <= num2; number++) { - mvar += ROUND_UP (n2 - n1 + 1, 16); - *names = xnrealloc (*names, mvar, sizeof **names); + char *name = xasprintf ("%.*s%0*lu", + root_len1, name1, + n_digits1, number); + if (!add_var_name (name, &names, &n_vars, &allocated_vars, + &set, pv_opts)) + { + free (name); + goto exit; + } } - for (n = n1; n <= n2; n++) - { - char name[LONG_NAME_LEN + 1]; - sprintf (name, "%s%0*d", root1, d1, n); - (*names)[nvar] = xstrdup (name); - nvar++; - } + free (name1); + name1 = NULL; + free (name2); + name2 = NULL; } else { - if (nvar >= mvar) - { - mvar += 16; - *names = xnrealloc (*names, mvar, sizeof **names); - } - (*names)[nvar++] = xstrdup (name1); + if (!add_var_name (name1, &names, &n_vars, &allocated_vars, + &set, pv_opts)) + goto exit; + name1 = NULL; } - lex_match (','); + lex_match (lexer, T_COMMA); if (pv_opts & PV_SINGLE) break; } - while (token == T_ID); - success = 1; + while (lex_token (lexer) == T_ID); + ok = true; -fail: - *nnames = nvar; - if (!success) +exit: + stringi_set_destroy (&set); + if (ok) + { + *namesp = names; + *n_varsp = n_vars; + } + else { int i; - for (i = 0; i < nvar; i++) - free ((*names)[i]); - free (*names); - *names = NULL; - *nnames = 0; + for (i = 0; i < n_vars; i++) + free (names[i]); + free (names); + *namesp = NULL; + *n_varsp = 0; + + free (name1); + free (name2); } - return success; + return ok; } /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well @@ -512,7 +608,8 @@ register_vars_pool (struct pool *pool, char **names, size_t nnames) parse_DATA_LIST_vars(), except that all allocations are taken from the given POOL. */ bool -parse_DATA_LIST_vars_pool (struct pool *pool, +parse_DATA_LIST_vars_pool (struct lexer *lexer, const struct dictionary *dict, + struct pool *pool, char ***names, size_t *nnames, int pv_opts) { int retval; @@ -522,8 +619,8 @@ parse_DATA_LIST_vars_pool (struct pool *pool, presumably already in the pool, which would attempt to re-free it later. */ assert (!(pv_opts & PV_APPEND)); - - retval = parse_DATA_LIST_vars (names, nnames, pv_opts); + + retval = parse_DATA_LIST_vars (lexer, dict, names, nnames, pv_opts); if (retval) register_vars_pool (pool, *names, *nnames); return retval; @@ -533,38 +630,41 @@ parse_DATA_LIST_vars_pool (struct pool *pool, existing and the rest are to be created. Same args as parse_DATA_LIST_vars(). */ bool -parse_mixed_vars (char ***names, size_t *nnames, int pv_opts) +parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict, + char ***names, size_t *nnames, int pv_opts) { size_t i; assert (names != NULL); assert (nnames != NULL); - assert ((pv_opts & ~PV_APPEND) == 0); if (!(pv_opts & PV_APPEND)) { *names = NULL; *nnames = 0; } - while (token == T_ID || token == T_ALL) + while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL) { - if (token == T_ALL || dict_lookup_var (dataset_dict (current_dataset), tokid) != NULL) + if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL) { struct variable **v; size_t nv; - if (!parse_variables (dataset_dict (current_dataset), &v, &nv, PV_NONE)) + if (!parse_variables (lexer, dict, &v, &nv, pv_opts)) goto fail; *names = xnrealloc (*names, *nnames + nv, sizeof **names); for (i = 0; i < nv; i++) - (*names)[*nnames + i] = xstrdup (v[i]->name); + (*names)[*nnames + i] = xstrdup (var_get_name (v[i])); free (v); *nnames += nv; } - else if (!parse_DATA_LIST_vars (names, nnames, PV_APPEND)) + else if (!parse_DATA_LIST_vars (lexer, dict, names, nnames, PV_APPEND | pv_opts)) goto fail; } - return 1; + if (*nnames == 0) + goto fail; + + return true; fail: for (i = 0; i < *nnames; i++) @@ -572,7 +672,7 @@ fail: free (*names); *names = NULL; *nnames = 0; - return 0; + return false; } /* Parses a list of variables where some of the variables may be @@ -580,7 +680,7 @@ fail: parse_mixed_vars(), except that all allocations are taken from the given POOL. */ bool -parse_mixed_vars_pool (struct pool *pool, +parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool, char ***names, size_t *nnames, int pv_opts) { int retval; @@ -591,15 +691,145 @@ parse_mixed_vars_pool (struct pool *pool, re-free it later. */ assert (!(pv_opts & PV_APPEND)); - retval = parse_mixed_vars (names, nnames, pv_opts); + retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts); if (retval) register_vars_pool (pool, *names, *nnames); return retval; } +/* Frees the N var_syntax structures in VS, as well as VS itself. */ +void +var_syntax_destroy (struct var_syntax *vs, size_t n) +{ + for (size_t i = 0; i < n; i++) + { + free (vs[i].first); + free (vs[i].last); + } + free (vs); +} + +/* Parses syntax for variables and variable ranges from LEXER. If successful, + initializes *VS to the beginning of an array of var_syntax structs and *N_VS + to the number of elements in the array and returns true. On error, sets *VS + to NULL and *N_VS to 0 and returns false. */ +bool +var_syntax_parse (struct lexer *lexer, struct var_syntax **vs, size_t *n_vs) +{ + *vs = NULL; + *n_vs = 0; + + if (lex_token (lexer) != T_ID) + { + lex_error (lexer, _("expecting variable name")); + goto error; + } + + size_t allocated_vs = 0; + do + { + if (allocated_vs >= *n_vs) + *vs = x2nrealloc (*vs, &allocated_vs, sizeof **vs); + struct var_syntax *new = &(*vs)[(*n_vs)++]; + *new = (struct var_syntax) { .first = ss_xstrdup (lex_tokss (lexer)) }; + lex_get (lexer); + + if (lex_match (lexer, T_TO)) + { + if (lex_token (lexer) != T_ID) + { + lex_error (lexer, _("expecting variable name")); + goto error; + } + + new->last = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + } + while (lex_token (lexer) == T_ID); + return true; + +error: + var_syntax_destroy (*vs, *n_vs); + *vs = NULL; + *n_vs = 0; + return false; +} + +/* Looks up the N_VS var syntax structs in VS in DICT, translating them to an + array of variables. If successful, initializes *VARS to the beginning of an + array of pointers to variables and *N_VARS to the length of the array and + returns true. On error, sets *VARS to NULL and *N_VARS to 0. + + For the moment, only honors PV_NUMERIC in OPTS. */ +bool +var_syntax_evaluate (const struct var_syntax *vs, size_t n_vs, + const struct dictionary *dict, + struct variable ***vars, size_t *n_vars, int opts) +{ + assert (!(opts & ~PV_NUMERIC)); + + *vars = NULL; + *n_vars = 0; + + size_t allocated_vars = 0; + for (size_t i = 0; i < n_vs; i++) + { + struct variable *first = dict_lookup_var (dict, vs[i].first); + if (!first) + { + msg (SE, _("%s is not a variable name."), vs[i].first); + goto error; + } + + struct variable *last = (vs[i].last + ? dict_lookup_var (dict, vs[i].last) + : first); + if (!last) + { + msg (SE, _("%s is not a variable name."), vs[i].last); + goto error; + } + + size_t first_idx = var_get_dict_index (first); + size_t last_idx = var_get_dict_index (last); + if (last_idx < first_idx) + { + msg (SE, _("%s TO %s is not valid syntax since %s " + "precedes %s in the dictionary."), + vs[i].first, vs[i].last, + vs[i].first, vs[i].last); + goto error; + } + + for (size_t j = first_idx; j <= last_idx; j++) + { + struct variable *v = dict_get_var (dict, j); + if (opts & PV_NUMERIC && !var_is_numeric (v)) + { + msg (SW, _("%s is not a numeric variable."), var_get_name (v)); + goto error; + } + + if (*n_vars >= allocated_vars) + *vars = x2nrealloc (*vars, &allocated_vars, sizeof **vars); + (*vars)[(*n_vars)++] = v; + } + } + + return true; + +error: + free (*vars); + *vars = NULL; + *n_vars = 0; + return false; +} + /* A set of variables. */ -struct var_set +struct var_set { + bool names_must_be_ids; size_t (*get_cnt) (const struct var_set *); struct variable *(*get_var) (const struct var_set *, size_t idx); bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *); @@ -609,7 +839,7 @@ struct var_set /* Returns the number of variables in VS. */ size_t -var_set_get_cnt (const struct var_set *vs) +var_set_get_cnt (const struct var_set *vs) { assert (vs != NULL); @@ -618,8 +848,8 @@ var_set_get_cnt (const struct var_set *vs) /* Return variable with index IDX in VS. IDX must be less than the number of variables in VS. */ -struct variable * -var_set_get_var (const struct var_set *vs, size_t idx) +static struct variable * +var_set_get_var (const struct var_set *vs, size_t idx) { assert (vs != NULL); assert (idx < var_set_get_cnt (vs)); @@ -630,7 +860,7 @@ var_set_get_var (const struct var_set *vs, size_t idx) /* Returns the variable in VS named NAME, or a null pointer if VS contains no variable with that name. */ struct variable * -var_set_lookup_var (const struct var_set *vs, const char *name) +var_set_lookup_var (const struct var_set *vs, const char *name) { size_t idx; return (var_set_lookup_var_idx (vs, name, &idx) @@ -646,22 +876,27 @@ var_set_lookup_var_idx (const struct var_set *vs, const char *name, { assert (vs != NULL); assert (name != NULL); - assert (strlen (name) <= LONG_NAME_LEN); return vs->lookup_var_idx (vs, name, idx); } /* Destroys VS. */ void -var_set_destroy (struct var_set *vs) +var_set_destroy (struct var_set *vs) { if (vs != NULL) vs->destroy (vs); } + +static bool +var_set_get_names_must_be_ids (const struct var_set *vs) +{ + return vs->names_must_be_ids; +} /* Returns the number of variables in VS. */ static size_t -dict_var_set_get_cnt (const struct var_set *vs) +dict_var_set_get_cnt (const struct var_set *vs) { struct dictionary *d = vs->aux; @@ -671,7 +906,7 @@ dict_var_set_get_cnt (const struct var_set *vs) /* Return variable with index IDX in VS. IDX must be less than the number of variables in VS. */ static struct variable * -dict_var_set_get_var (const struct var_set *vs, size_t idx) +dict_var_set_get_var (const struct var_set *vs, size_t idx) { struct dictionary *d = vs->aux; @@ -682,13 +917,13 @@ dict_var_set_get_var (const struct var_set *vs, size_t idx) and returns true. Otherwise, returns false. */ static bool dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name, - size_t *idx) + size_t *idx) { struct dictionary *d = vs->aux; struct variable *v = dict_lookup_var (d, name); - if (v != NULL) + if (v != NULL) { - *idx = v->index; + *idx = var_get_dict_index (v); return true; } else @@ -697,16 +932,17 @@ dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name, /* Destroys VS. */ static void -dict_var_set_destroy (struct var_set *vs) +dict_var_set_destroy (struct var_set *vs) { free (vs); } /* Returns a variable set based on D. */ struct var_set * -var_set_create_from_dict (const struct dictionary *d) +var_set_create_from_dict (const struct dictionary *d) { struct var_set *vs = xmalloc (sizeof *vs); + vs->names_must_be_ids = dict_get_names_must_be_ids (d); vs->get_cnt = dict_var_set_get_cnt; vs->get_var = dict_var_set_get_var; vs->lookup_var_idx = dict_var_set_lookup_var_idx; @@ -716,16 +952,16 @@ var_set_create_from_dict (const struct dictionary *d) } /* A variable set based on an array. */ -struct array_var_set +struct array_var_set { struct variable *const *var;/* Array of variables. */ size_t var_cnt; /* Number of elements in var. */ - struct hsh_table *name_tab; /* Hash from variable names to variables. */ + struct hmapx vars_by_name; /* Variables hashed by name. */ }; /* Returns the number of variables in VS. */ static size_t -array_var_set_get_cnt (const struct var_set *vs) +array_var_set_get_cnt (const struct var_set *vs) { struct array_var_set *avs = vs->aux; @@ -735,55 +971,55 @@ array_var_set_get_cnt (const struct var_set *vs) /* Return variable with index IDX in VS. IDX must be less than the number of variables in VS. */ static struct variable * -array_var_set_get_var (const struct var_set *vs, size_t idx) +array_var_set_get_var (const struct var_set *vs, size_t idx) { struct array_var_set *avs = vs->aux; - return (struct variable *) avs->var[idx]; + return CONST_CAST (struct variable *, avs->var[idx]); } /* If VS contains a variable named NAME, sets *IDX to its index and returns true. Otherwise, returns false. */ static bool array_var_set_lookup_var_idx (const struct var_set *vs, const char *name, - size_t *idx) + size_t *idx) { struct array_var_set *avs = vs->aux; - struct variable v, *vp, *const *vpp; + struct hmapx_node *node; + struct variable **varp; - strcpy (v.name, name); - vp = &v; - vpp = hsh_find (avs->name_tab, &vp); - if (vpp != NULL) - { - *idx = vpp - avs->var; - return true; - } - else - return false; + HMAPX_FOR_EACH_WITH_HASH (varp, node, utf8_hash_case_string (name, 0), + &avs->vars_by_name) + if (!utf8_strcasecmp (name, var_get_name (*varp))) + { + *idx = varp - avs->var; + return true; + } + + return false; } /* Destroys VS. */ static void -array_var_set_destroy (struct var_set *vs) +array_var_set_destroy (struct var_set *vs) { struct array_var_set *avs = vs->aux; - hsh_destroy (avs->name_tab); + hmapx_destroy (&avs->vars_by_name); free (avs); free (vs); } -/* Returns a variable set based on the VAR_CNT variables in - VAR. */ +/* Returns a variable set based on the VAR_CNT variables in VAR. */ struct var_set * -var_set_create_from_array (struct variable *const *var, size_t var_cnt) +var_set_create_from_array (struct variable *const *var, size_t var_cnt) { struct var_set *vs; struct array_var_set *avs; size_t i; vs = xmalloc (sizeof *vs); + vs->names_must_be_ids = true; vs->get_cnt = array_var_set_get_cnt; vs->get_var = array_var_set_get_var; vs->lookup_var_idx = array_var_set_lookup_var_idx; @@ -791,15 +1027,88 @@ var_set_create_from_array (struct variable *const *var, size_t var_cnt) vs->aux = avs = xmalloc (sizeof *avs); avs->var = var; avs->var_cnt = var_cnt; - avs->name_tab = hsh_create (2 * var_cnt, - compare_var_ptr_names, hash_var_ptr_name, NULL, - NULL); + hmapx_init (&avs->vars_by_name); for (i = 0; i < var_cnt; i++) - if (hsh_insert (avs->name_tab, (void *) &var[i]) != NULL) - { - var_set_destroy (vs); - return NULL; - } - + { + const char *name = var_get_name (var[i]); + size_t idx; + + if (array_var_set_lookup_var_idx (vs, name, &idx)) + { + var_set_destroy (vs); + return NULL; + } + hmapx_insert (&avs->vars_by_name, CONST_CAST (void *, &avs->var[i]), + utf8_hash_case_string (name, 0)); + } + return vs; } + + +/* Match a variable. + If the match succeeds, the variable will be placed in VAR. + Returns true if successful */ +bool +lex_match_variable (struct lexer *lexer, const struct dictionary *dict, const struct variable **var) +{ + if (lex_token (lexer) != T_ID) + return false; + + *var = parse_variable_const (lexer, dict); + + if (*var == NULL) + return false; + return true; +} + +/* An interaction is a variable followed by {*, BY} followed by an interaction */ +static bool +parse_internal_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact, struct interaction **it) +{ + const struct variable *v = NULL; + assert (iact); + + switch (lex_next_token (lexer, 1)) + { + case T_ENDCMD: + case T_SLASH: + case T_COMMA: + case T_ID: + case T_BY: + case T_ASTERISK: + break; + default: + return false; + break; + } + + if (! lex_match_variable (lexer, dict, &v)) + { + if (it) + interaction_destroy (*it); + *iact = NULL; + return false; + } + + assert (v); + + if (*iact == NULL) + *iact = interaction_create (v); + else + interaction_add_variable (*iact, v); + + if (lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY)) + { + return parse_internal_interaction (lexer, dict, iact, iact); + } + + return true; +} + +bool +parse_design_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact) +{ + return parse_internal_interaction (lexer, dict, iact, NULL); +} +