X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Flexer%2Fvariable-parser.c;h=f444f15e2beb0c28ccc708af69463e64a3ebdd03;hb=0a37f63dddea0f5f41b4a7f0d57eb226284a543f;hp=5c19b81e2f5bd8c477f80742464416b8a58f8fb2;hpb=b6493cbb201307a2a5d1ff840a8347d75548cb85;p=pspp diff --git a/src/language/lexer/variable-parser.c b/src/language/lexer/variable-parser.c index 5c19b81e2f..f444f15e2b 100644 --- a/src/language/lexer/variable-parser.c +++ b/src/language/lexer/variable-parser.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012, 2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "libpspp/assertion.h" #include "libpspp/cast.h" #include "libpspp/hash-functions.h" +#include "libpspp/i18n.h" #include "libpspp/hmapx.h" #include "libpspp/message.h" #include "libpspp/misc.h" @@ -45,28 +46,44 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -static struct variable * var_set_get_var (const struct var_set *, size_t ); - +static struct variable *var_set_get_var (const struct var_set *, size_t); static struct variable *var_set_lookup_var (const struct var_set *, const char *); - static bool var_set_lookup_var_idx (const struct var_set *, const char *, size_t *); +static bool var_set_get_names_must_be_ids (const struct var_set *); +static bool +is_name_token (const struct lexer *lexer, bool names_must_be_ids) +{ + return (lex_token (lexer) == T_ID + || (!names_must_be_ids && lex_token (lexer) == T_STRING)); +} +static bool +is_vs_name_token (const struct lexer *lexer, const struct var_set *vs) +{ + return is_name_token (lexer, var_set_get_names_must_be_ids (vs)); +} + +static bool +is_dict_name_token (const struct lexer *lexer, const struct dictionary *d) +{ + return is_name_token (lexer, dict_get_names_must_be_ids (d)); +} /* Parses a name as a variable within VS. Sets *IDX to the variable's index and returns true if successful. On failure emits an error message and returns false. */ static bool parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs, - size_t *idx) + size_t *idx) { assert (idx != NULL); - if (lex_token (lexer) != T_ID) + if (!is_vs_name_token (lexer, vs)) { - lex_error (lexer, _("expecting variable name")); + lex_error (lexer, _("Syntax error expecting variable name.")); return false; } else if (var_set_lookup_var_idx (vs, lex_tokcstr (lexer), idx)) @@ -76,7 +93,7 @@ parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs, } else { - msg (SE, _("%s is not a variable name."), lex_tokcstr (lexer)); + lex_error (lexer, _("%s is not a variable name."), lex_tokcstr (lexer)); return false; } } @@ -105,35 +122,42 @@ parse_variable (struct lexer *lexer, const struct dictionary *d) /* Parses a set of variables from dictionary D given options OPTS. Resulting list of variables stored in *VAR and the - number of variables into *CNT. Returns true only if - successful. */ + number of variables into *N. Returns true only if + successful. The dictionary D must contain at least one + variable. */ bool parse_variables (struct lexer *lexer, const struct dictionary *d, - struct variable ***var, - size_t *cnt, int opts) + struct variable ***var, + size_t *n, int opts) { struct var_set *vs; int success; assert (d != NULL); assert (var != NULL); - assert (cnt != NULL); + assert (n != NULL); vs = var_set_create_from_dict (d); - success = parse_var_set_vars (lexer, vs, var, cnt, opts); + if (var_set_get_n (vs) == 0) + { + *n = 0; + var_set_destroy (vs); + return false; + } + success = parse_var_set_vars (lexer, vs, var, n, opts); var_set_destroy (vs); return success; } /* Parses a set of variables from dictionary D given options OPTS. Resulting list of variables stored in *VARS and the - number of variables into *VAR_CNT. Returns true only if + number of variables into *N_VARS. Returns true only if successful. Same behavior as parse_variables, except that all allocations are taken from the given POOL. */ bool parse_variables_pool (struct lexer *lexer, struct pool *pool, const struct dictionary *dict, - struct variable ***vars, size_t *var_cnt, int opts) + struct variable ***vars, size_t *n_vars, int opts) { int retval; @@ -143,7 +167,7 @@ parse_variables_pool (struct lexer *lexer, struct pool *pool, later. */ assert (!(opts & PV_APPEND)); - retval = parse_variables (lexer, dict, vars, var_cnt, opts); + retval = parse_variables (lexer, dict, vars, n_vars, opts); if (retval) pool_register (pool, free, *vars); return retval; @@ -171,38 +195,46 @@ parse_var_idx_class (struct lexer *lexer, const struct var_set *vs, PV_OPTS, which also affects what variables are allowed in appropriate ways. */ static void -add_variable (struct variable ***v, size_t *nv, size_t *mv, +add_variable (struct lexer *lexer, + struct variable ***v, size_t *nv, size_t *mv, char *included, int pv_opts, - const struct var_set *vs, size_t idx) + const struct var_set *vs, size_t idx, + int start_ofs, int end_ofs) { struct variable *add = var_set_get_var (vs, idx); const char *add_name = var_get_name (add); if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add)) - msg (SW, _("%s is not a numeric variable. It will not be " - "included in the variable list."), add_name); + lex_ofs_msg (lexer, SW, start_ofs, end_ofs, + _("%s is not a numeric variable. It will not be " + "included in the variable list."), add_name); else if ((pv_opts & PV_STRING) && !var_is_alpha (add)) - msg (SE, _("%s is not a string variable. It will not be " - "included in the variable list."), add_name); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("%s is not a string variable. It will not be " + "included in the variable list."), add_name); else if ((pv_opts & PV_NO_SCRATCH) && dict_class_from_id (add_name) == DC_SCRATCH) - msg (SE, _("Scratch variables (such as %s) are not allowed " - "here."), add_name); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("Scratch variables (such as %s) are not allowed " + "here."), add_name); else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv && var_get_type (add) != var_get_type ((*v)[0])) - msg (SE, _("%s and %s are not the same type. All variables in " - "this variable list must be of the same type. %s " - "will be omitted from the list."), - var_get_name ((*v)[0]), add_name, add_name); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("%s and %s are not the same type. All variables in " + "this variable list must be of the same type. %s " + "will be omitted from the list."), + var_get_name ((*v)[0]), add_name, add_name); else if ((pv_opts & PV_SAME_WIDTH) && *nv && var_get_width (add) != var_get_width ((*v)[0])) - msg (SE, _("%s and %s are string variables with different widths. " - "All variables in this variable list must have the " - "same width. %s will be omitted from the list."), - var_get_name ((*v)[0]), add_name, add_name); - else if ((pv_opts & PV_NO_DUPLICATE) && included[idx]) - msg (SE, _("Variable %s appears twice in variable list."), add_name); - else if ((pv_opts & PV_DUPLICATE) || !included[idx]) + lex_ofs_error (lexer, start_ofs, end_ofs, + _("%s and %s are string variables with different widths. " + "All variables in this variable list must have the " + "same width. %s will be omitted from the list."), + var_get_name ((*v)[0]), add_name, add_name); + else if ((pv_opts & PV_NO_DUPLICATE) && included && included[idx]) + lex_ofs_error (lexer, start_ofs, end_ofs, + _("Variable %s appears twice in variable list."), add_name); + else if ((pv_opts & PV_DUPLICATE) || !included || !included[idx]) { if (*nv >= *mv) { @@ -221,16 +253,19 @@ add_variable (struct variable ***v, size_t *nv, size_t *mv, duplicates if indicated by PV_OPTS, which also affects what variables are allowed in appropriate ways. */ static void -add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included, +add_variables (struct lexer *lexer, + struct variable ***v, size_t *nv, size_t *mv, char *included, int pv_opts, const struct var_set *vs, int first_idx, int last_idx, - enum dict_class class) + enum dict_class class, + int start_ofs, int end_ofs) { size_t i; for (i = first_idx; i <= last_idx; i++) if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class) - add_variable (v, nv, mv, included, pv_opts, vs, i); + add_variable (lexer, v, nv, mv, included, pv_opts, vs, i, + start_ofs, end_ofs); } /* Note that if parse_variables() returns false, *v is free()'d. @@ -271,7 +306,7 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, { size_t i; - included = xcalloc (var_set_get_cnt (vs), sizeof *included); + included = xcalloc (var_set_get_n (vs), sizeof *included); for (i = 0; i < *nv; i++) { size_t index; @@ -285,9 +320,11 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, do { + int start_ofs = lex_ofs (lexer); if (lex_match (lexer, T_ALL)) - add_variables (v, nv, &mv, included, pv_opts, - vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY); + add_variables (lexer, v, nv, &mv, included, pv_opts, + vs, 0, var_set_get_n (vs) - 1, DC_ORDINARY, + start_ofs, start_ofs); else { enum dict_class class; @@ -297,7 +334,8 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, goto fail; if (!lex_match (lexer, T_TO)) - add_variable (v, nv, &mv, included, pv_opts, vs, first_idx); + add_variable (lexer, v, nv, &mv, included, pv_opts, vs, first_idx, + start_ofs, start_ofs); else { size_t last_idx; @@ -307,6 +345,8 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class)) goto fail; + int end_ofs = lex_ofs (lexer) - 1; + first_var = var_set_get_var (vs, first_idx); last_var = var_set_get_var (vs, last_idx); @@ -314,27 +354,56 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, { const char *first_name = var_get_name (first_var); const char *last_name = var_get_name (last_var); - msg (SE, _("%s TO %s is not valid syntax since %s " - "precedes %s in the dictionary."), - first_name, last_name, first_name, last_name); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("%s TO %s is not valid syntax since %s " + "precedes %s in the dictionary."), + first_name, last_name, first_name, last_name); goto fail; } if (class != last_class) { - msg (SE, _("When using the TO keyword to specify several " - "variables, both variables must be from " - "the same variable dictionaries, of either " - "ordinary, scratch, or system variables. " - "%s is a %s variable, whereas %s is %s."), - var_get_name (first_var), dict_class_to_name (class), - var_get_name (last_var), - dict_class_to_name (last_class)); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("With the syntax TO , variables " + "and must be both regular variables " + "or both scratch variables.")); + struct pair + { + const char *name; + enum dict_class class; + int ofs; + } + pairs[2] = { + { var_get_name (first_var), class, start_ofs }, + { var_get_name (last_var), last_class, end_ofs }, + }; + for (size_t i = 0; i < 2; i++) + switch (pairs[i].class) + { + case DC_ORDINARY: + lex_ofs_msg (lexer, SN, pairs[i].ofs, pairs[i].ofs, + _("%s is a regular variable."), + pairs[i].name); + break; + + case DC_SCRATCH: + lex_ofs_msg (lexer, SN, pairs[i].ofs, pairs[i].ofs, + _("%s is a scratch variable."), + pairs[i].name); + break; + + case DC_SYSTEM: + lex_ofs_msg (lexer, SN, pairs[i].ofs, pairs[i].ofs, + _("%s is a system variable."), + pairs[i].name); + break; + } goto fail; } - add_variables (v, nv, &mv, included, pv_opts, - vs, first_idx, last_idx, class); + add_variables (lexer, v, nv, &mv, included, pv_opts, + vs, first_idx, last_idx, class, + start_ofs, lex_ofs (lexer) - 1); } } @@ -343,7 +412,8 @@ parse_var_set_vars (struct lexer *lexer, const struct var_set *vs, lex_match (lexer, T_COMMA); } while (lex_token (lexer) == T_ALL - || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL)); + || (is_vs_name_token (lexer, vs) + && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL)); if (*nv == 0) goto fail; @@ -359,13 +429,34 @@ fail: return 0; } +char * +parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d) +{ + if (!is_dict_name_token (lexer, d)) + { + lex_error (lexer, ("Syntax error expecting variable name.")); + return NULL; + } + char *error = dict_id_is_valid__ (d, lex_tokcstr (lexer)); + if (error) + { + lex_error (lexer, "%s", error); + free (error); + return NULL; + } + + char *name = xstrdup (lex_tokcstr (lexer)); + lex_get (lexer); + return name; +} + /* Attempts to break UTF-8 encoded NAME into a root (whose contents are arbitrary except that it does not end in a digit) followed by an integer numeric suffix. On success, stores the value of the suffix into *NUMBERP, the number of digits in the suffix into *N_DIGITSP, and returns the number of bytes in the root. On failure, returns 0. */ static int -extract_numeric_suffix (const char *name, +extract_numeric_suffix (struct lexer *lexer, int ofs, const char *name, unsigned long int *numberp, int *n_digitsp) { size_t root_len, n_digits; @@ -380,16 +471,18 @@ extract_numeric_suffix (const char *name, if (n_digits == 0) { - msg (SE, _("`%s' cannot be used with TO because it does not end in " - "a digit."), name); + lex_ofs_error (lexer, ofs, ofs, + _("`%s' cannot be used with TO because it does not end in " + "a digit."), name); return 0; } *numberp = strtoull (name + root_len, NULL, 10); if (*numberp == ULONG_MAX) { - msg (SE, _("Numeric suffix on `%s' is larger than supported with TO."), - name); + lex_ofs_error (lexer, ofs, ofs, + _("Numeric suffix on `%s' is larger than supported with TO."), + name); return 0; } *n_digitsp = n_digits; @@ -397,14 +490,15 @@ extract_numeric_suffix (const char *name, } static bool -add_var_name (char *name, +add_var_name (struct lexer *lexer, int start_ofs, int end_ofs, char *name, char ***names, size_t *n_vars, size_t *allocated_vars, struct stringi_set *set, int pv_opts) { if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (set, name)) { - msg (SE, _("Variable %s appears twice in variable list."), - name); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("Variable %s appears twice in variable list."), + name); return false; } @@ -428,6 +522,7 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, char *name1 = NULL; char *name2 = NULL; + bool ok = false; assert ((pv_opts & ~(PV_APPEND | PV_SINGLE @@ -455,53 +550,48 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, do { - if (lex_token (lexer) != T_ID - || !dict_id_is_valid (dict, lex_tokcstr (lexer), true)) + int start_ofs = lex_ofs (lexer); + name1 = parse_DATA_LIST_var (lexer, dict); + if (!name1) + goto exit; + if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH) { - lex_error (lexer, "expecting variable name"); + lex_ofs_error (lexer, start_ofs, start_ofs, + _("Scratch variables not allowed here.")); goto exit; } - if (dict_class_from_id (lex_tokcstr (lexer)) == DC_SCRATCH - && (pv_opts & PV_NO_SCRATCH)) + if (lex_match (lexer, T_TO)) { - msg (SE, _("Scratch variables not allowed here.")); - goto exit; - } - name1 = xstrdup (lex_tokcstr (lexer)); - lex_get (lexer); - if (lex_token (lexer) == T_TO) - { - unsigned long int num1, num2; + unsigned long int num1, num2; int n_digits1, n_digits2; int root_len1, root_len2; unsigned long int number; - lex_get (lexer); - if (lex_token (lexer) != T_ID - || !dict_id_is_valid (dict, lex_tokcstr (lexer), true)) - { - lex_error (lexer, "expecting variable name"); - goto exit; - } - name2 = xstrdup (lex_tokcstr (lexer)); - lex_get (lexer); + name2 = parse_DATA_LIST_var (lexer, dict); + if (!name2) + goto exit; + int end_ofs = lex_ofs (lexer) - 1; - root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1); + root_len1 = extract_numeric_suffix (lexer, start_ofs, + name1, &num1, &n_digits1); if (root_len1 == 0) goto exit; - root_len2 = extract_numeric_suffix (name2, &num2, &n_digits2); + root_len2 = extract_numeric_suffix (lexer, end_ofs, + name2, &num2, &n_digits2); if (root_len2 == 0) goto exit; if (root_len1 != root_len2 || memcasecmp (name1, name2, root_len1)) { - msg (SE, _("Prefixes don't match in use of TO convention.")); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("Prefixes don't match in use of TO convention.")); goto exit; } if (num1 > num2) { - msg (SE, _("Bad bounds in use of TO convention.")); + lex_ofs_error (lexer, start_ofs, end_ofs, + _("Bad bounds in use of TO convention.")); goto exit; } @@ -510,7 +600,8 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, char *name = xasprintf ("%.*s%0*lu", root_len1, name1, n_digits1, number); - if (!add_var_name (name, &names, &n_vars, &allocated_vars, + if (!add_var_name (lexer, start_ofs, end_ofs, + name, &names, &n_vars, &allocated_vars, &set, pv_opts)) { free (name); @@ -525,7 +616,8 @@ parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict, } else { - if (!add_var_name (name1, &names, &n_vars, &allocated_vars, + if (!add_var_name (lexer, start_ofs, start_ofs, + name1, &names, &n_vars, &allocated_vars, &set, pv_opts)) goto exit; name1 = NULL; @@ -607,21 +699,20 @@ parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict, assert (names != NULL); assert (nnames != NULL); - assert ((pv_opts & ~PV_APPEND) == 0); if (!(pv_opts & PV_APPEND)) { *names = NULL; *nnames = 0; } - while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL) + while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL) { if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL) { struct variable **v; size_t nv; - if (!parse_variables (lexer, dict, &v, &nv, PV_NONE)) + if (!parse_variables (lexer, dict, &v, &nv, pv_opts)) goto fail; *names = xnrealloc (*names, *nnames + nv, sizeof **names); for (i = 0; i < nv; i++) @@ -629,10 +720,13 @@ parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict, free (v); *nnames += nv; } - else if (!parse_DATA_LIST_vars (lexer, dict, names, nnames, PV_APPEND)) + else if (!parse_DATA_LIST_vars (lexer, dict, names, nnames, PV_APPEND | pv_opts)) goto fail; } - return 1; + if (*nnames == 0) + goto fail; + + return true; fail: for (i = 0; i < *nnames; i++) @@ -640,7 +734,7 @@ fail: free (*names); *names = NULL; *nnames = 0; - return 0; + return false; } /* Parses a list of variables where some of the variables may be @@ -665,10 +759,154 @@ parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struc return retval; } +/* Frees the N var_syntax structures in VS, as well as VS itself. */ +void +var_syntax_destroy (struct var_syntax *vs, size_t n) +{ + for (size_t i = 0; i < n; i++) + { + free (vs[i].first); + free (vs[i].last); + } + free (vs); +} + +/* Parses syntax for variables and variable ranges from LEXER. If successful, + initializes *VS to the beginning of an array of var_syntax structs and *N_VS + to the number of elements in the array and returns true. On error, sets *VS + to NULL and *N_VS to 0 and returns false. */ +bool +var_syntax_parse (struct lexer *lexer, struct var_syntax **vs, size_t *n_vs) +{ + *vs = NULL; + *n_vs = 0; + + if (lex_token (lexer) != T_ID) + { + lex_error (lexer, _("Syntax error expecting variable name.")); + goto error; + } + + size_t allocated_vs = 0; + do + { + if (allocated_vs >= *n_vs) + *vs = x2nrealloc (*vs, &allocated_vs, sizeof **vs); + struct var_syntax *new = &(*vs)[(*n_vs)++]; + *new = (struct var_syntax) { + .first = ss_xstrdup (lex_tokss (lexer)), + .first_ofs = lex_ofs (lexer) + }; + lex_get (lexer); + + if (lex_match (lexer, T_TO)) + { + if (lex_token (lexer) != T_ID) + { + lex_error (lexer, _("Syntax error expecting variable name.")); + goto error; + } + + new->last = ss_xstrdup (lex_tokss (lexer)); + lex_get (lexer); + } + new->last_ofs = lex_ofs (lexer) - 1; + } + while (lex_token (lexer) == T_ID); + return true; + +error: + var_syntax_destroy (*vs, *n_vs); + *vs = NULL; + *n_vs = 0; + return false; +} + +/* Looks up the N_VS var syntax structs in VS in DICT, translating them to an + array of variables. If successful, initializes *VARS to the beginning of an + array of pointers to variables and *N_VARS to the length of the array and + returns true. On error, sets *VARS to NULL and *N_VARS to 0. + + The LEXER is just used for error messages. + + For the moment, only honors PV_NUMERIC in OPTS. */ +bool +var_syntax_evaluate (struct lexer *lexer, + const struct var_syntax *vs, size_t n_vs, + const struct dictionary *dict, + struct variable ***vars, size_t *n_vars, int opts) +{ + assert (!(opts & ~PV_NUMERIC)); + + *vars = NULL; + *n_vars = 0; + + size_t allocated_vars = 0; + for (size_t i = 0; i < n_vs; i++) + { + int first_ofs = vs[i].first_ofs; + struct variable *first = dict_lookup_var (dict, vs[i].first); + if (!first) + { + lex_ofs_error (lexer, first_ofs, first_ofs, + _("%s is not a variable name."), vs[i].first); + goto error; + } + + int last_ofs = vs[i].last_ofs; + struct variable *last = (vs[i].last + ? dict_lookup_var (dict, vs[i].last) + : first); + if (!last) + { + lex_ofs_error (lexer, last_ofs, last_ofs, + _("%s is not a variable name."), vs[i].last); + goto error; + } + + size_t first_idx = var_get_dict_index (first); + size_t last_idx = var_get_dict_index (last); + if (last_idx < first_idx) + { + lex_ofs_error (lexer, first_ofs, last_ofs, + _("%s TO %s is not valid syntax since %s " + "precedes %s in the dictionary."), + vs[i].first, vs[i].last, + vs[i].first, vs[i].last); + goto error; + } + + for (size_t j = first_idx; j <= last_idx; j++) + { + struct variable *v = dict_get_var (dict, j); + if (opts & PV_NUMERIC && !var_is_numeric (v)) + { + lex_ofs_error (lexer, first_ofs, last_ofs, + _("%s is not a numeric variable."), + var_get_name (v)); + goto error; + } + + if (*n_vars >= allocated_vars) + *vars = x2nrealloc (*vars, &allocated_vars, sizeof **vars); + (*vars)[(*n_vars)++] = v; + } + } + + return true; + +error: + free (*vars); + *vars = NULL; + *n_vars = 0; + return false; +} + /* A set of variables. */ struct var_set { - size_t (*get_cnt) (const struct var_set *); + bool names_must_be_ids; + size_t (*get_n) (const struct var_set *); struct variable *(*get_var) (const struct var_set *, size_t idx); bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *); void (*destroy) (struct var_set *); @@ -677,11 +915,11 @@ struct var_set /* Returns the number of variables in VS. */ size_t -var_set_get_cnt (const struct var_set *vs) +var_set_get_n (const struct var_set *vs) { assert (vs != NULL); - return vs->get_cnt (vs); + return vs->get_n (vs); } /* Return variable with index IDX in VS. @@ -690,7 +928,7 @@ static struct variable * var_set_get_var (const struct var_set *vs, size_t idx) { assert (vs != NULL); - assert (idx < var_set_get_cnt (vs)); + assert (idx < var_set_get_n (vs)); return vs->get_var (vs, idx); } @@ -725,14 +963,20 @@ var_set_destroy (struct var_set *vs) if (vs != NULL) vs->destroy (vs); } + +static bool +var_set_get_names_must_be_ids (const struct var_set *vs) +{ + return vs->names_must_be_ids; +} /* Returns the number of variables in VS. */ static size_t -dict_var_set_get_cnt (const struct var_set *vs) +dict_var_set_get_n (const struct var_set *vs) { struct dictionary *d = vs->aux; - return dict_get_var_cnt (d); + return dict_get_n_vars (d); } /* Return variable with index IDX in VS. @@ -774,7 +1018,8 @@ struct var_set * var_set_create_from_dict (const struct dictionary *d) { struct var_set *vs = xmalloc (sizeof *vs); - vs->get_cnt = dict_var_set_get_cnt; + vs->names_must_be_ids = dict_get_names_must_be_ids (d); + vs->get_n = dict_var_set_get_n; vs->get_var = dict_var_set_get_var; vs->lookup_var_idx = dict_var_set_lookup_var_idx; vs->destroy = dict_var_set_destroy; @@ -786,17 +1031,17 @@ var_set_create_from_dict (const struct dictionary *d) struct array_var_set { struct variable *const *var;/* Array of variables. */ - size_t var_cnt; /* Number of elements in var. */ + size_t n_vars; /* Number of elements in var. */ struct hmapx vars_by_name; /* Variables hashed by name. */ }; /* Returns the number of variables in VS. */ static size_t -array_var_set_get_cnt (const struct var_set *vs) +array_var_set_get_n (const struct var_set *vs) { struct array_var_set *avs = vs->aux; - return avs->var_cnt; + return avs->n_vars; } /* Return variable with index IDX in VS. @@ -819,9 +1064,9 @@ array_var_set_lookup_var_idx (const struct var_set *vs, const char *name, struct hmapx_node *node; struct variable **varp; - HMAPX_FOR_EACH_WITH_HASH (varp, node, hash_case_string (name, 0), + HMAPX_FOR_EACH_WITH_HASH (varp, node, utf8_hash_case_string (name, 0), &avs->vars_by_name) - if (!strcasecmp (name, var_get_name (*varp))) + if (!utf8_strcasecmp (name, var_get_name (*varp))) { *idx = varp - avs->var; return true; @@ -841,24 +1086,25 @@ array_var_set_destroy (struct var_set *vs) free (vs); } -/* Returns a variable set based on the VAR_CNT variables in VAR. */ +/* Returns a variable set based on the N_VARS variables in VAR. */ struct var_set * -var_set_create_from_array (struct variable *const *var, size_t var_cnt) +var_set_create_from_array (struct variable *const *var, size_t n_vars) { struct var_set *vs; struct array_var_set *avs; size_t i; vs = xmalloc (sizeof *vs); - vs->get_cnt = array_var_set_get_cnt; + vs->names_must_be_ids = true; + vs->get_n = array_var_set_get_n; vs->get_var = array_var_set_get_var; vs->lookup_var_idx = array_var_set_lookup_var_idx; vs->destroy = array_var_set_destroy; vs->aux = avs = xmalloc (sizeof *avs); avs->var = var; - avs->var_cnt = var_cnt; + avs->n_vars = n_vars; hmapx_init (&avs->vars_by_name); - for (i = 0; i < var_cnt; i++) + for (i = 0; i < n_vars; i++) { const char *name = var_get_name (var[i]); size_t idx; @@ -869,7 +1115,7 @@ var_set_create_from_array (struct variable *const *var, size_t var_cnt) return NULL; } hmapx_insert (&avs->vars_by_name, CONST_CAST (void *, &avs->var[i]), - hash_case_string (name, 0)); + utf8_hash_case_string (name, 0)); } return vs; @@ -887,7 +1133,7 @@ lex_match_variable (struct lexer *lexer, const struct dictionary *dict, const st *var = parse_variable_const (lexer, dict); - if ( *var == NULL) + if (*var == NULL) return false; return true; } @@ -920,15 +1166,15 @@ parse_internal_interaction (struct lexer *lexer, const struct dictionary *dict, *iact = NULL; return false; } - + assert (v); - if ( *iact == NULL) + if (*iact == NULL) *iact = interaction_create (v); else interaction_add_variable (*iact, v); - if ( lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY)) + if (lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY)) { return parse_internal_interaction (lexer, dict, iact, iact); }