1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/variable-parser.h"
26 #include "data/dataset.h"
27 #include "data/dictionary.h"
28 #include "data/variable.h"
29 #include "language/lexer/lexer.h"
30 #include "libpspp/assertion.h"
31 #include "libpspp/cast.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/i18n.h"
34 #include "libpspp/hmapx.h"
35 #include "libpspp/message.h"
36 #include "libpspp/misc.h"
37 #include "libpspp/pool.h"
38 #include "libpspp/str.h"
39 #include "libpspp/stringi-set.h"
41 #include "math/interaction.h"
43 #include "gl/c-ctype.h"
44 #include "gl/xalloc.h"
47 #define _(msgid) gettext (msgid)
49 static struct variable *var_set_get_var (const struct var_set *, size_t);
50 static struct variable *var_set_lookup_var (const struct var_set *,
52 static bool var_set_lookup_var_idx (const struct var_set *, const char *,
54 static bool var_set_get_names_must_be_ids (const struct var_set *);
57 is_name_token (const struct lexer *lexer, bool names_must_be_ids)
59 return (lex_token (lexer) == T_ID
60 || (!names_must_be_ids && lex_token (lexer) == T_STRING));
64 is_vs_name_token (const struct lexer *lexer, const struct var_set *vs)
66 return is_name_token (lexer, var_set_get_names_must_be_ids (vs));
70 is_dict_name_token (const struct lexer *lexer, const struct dictionary *d)
72 return is_name_token (lexer, dict_get_names_must_be_ids (d));
75 /* Parses a name as a variable within VS. Sets *IDX to the
76 variable's index and returns true if successful. On failure
77 emits an error message and returns false. */
79 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
84 if (!is_vs_name_token (lexer, vs))
86 lex_error (lexer, _("expecting variable name"));
89 else if (var_set_lookup_var_idx (vs, lex_tokcstr (lexer), idx))
96 msg (SE, _("%s is not a variable name."), lex_tokcstr (lexer));
101 /* Parses a name as a variable within VS and returns the variable
102 if successful. On failure emits an error message and returns
104 static struct variable *
105 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
108 return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
111 /* Parses a variable name in dictionary D and returns the
112 variable if successful. On failure emits an error message and
113 returns a null pointer. */
115 parse_variable (struct lexer *lexer, const struct dictionary *d)
117 struct var_set *vs = var_set_create_from_dict (d);
118 struct variable *var = parse_vs_variable (lexer, vs);
119 var_set_destroy (vs);
123 /* Parses a set of variables from dictionary D given options
124 OPTS. Resulting list of variables stored in *VAR and the
125 number of variables into *CNT. Returns true only if
128 parse_variables (struct lexer *lexer, const struct dictionary *d,
129 struct variable ***var,
130 size_t *cnt, int opts)
136 assert (var != NULL);
137 assert (cnt != NULL);
139 vs = var_set_create_from_dict (d);
140 success = parse_var_set_vars (lexer, vs, var, cnt, opts);
141 var_set_destroy (vs);
145 /* Parses a set of variables from dictionary D given options
146 OPTS. Resulting list of variables stored in *VARS and the
147 number of variables into *VAR_CNT. Returns true only if
148 successful. Same behavior as parse_variables, except that all
149 allocations are taken from the given POOL. */
151 parse_variables_pool (struct lexer *lexer, struct pool *pool,
152 const struct dictionary *dict,
153 struct variable ***vars, size_t *var_cnt, int opts)
157 /* PV_APPEND is unsafe because parse_variables would free the
158 existing names on failure, but those names are presumably
159 already in the pool, which would attempt to re-free it
161 assert (!(opts & PV_APPEND));
163 retval = parse_variables (lexer, dict, vars, var_cnt, opts);
165 pool_register (pool, free, *vars);
169 /* Parses a variable name from VS. If successful, sets *IDX to
170 the variable's index in VS, *CLASS to the variable's
171 dictionary class, and returns true. Returns false on
174 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
176 enum dict_class *class)
178 if (!parse_vs_variable_idx (lexer, vs, idx))
181 *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx)));
185 /* Add the variable from VS with index IDX to the list of
186 variables V that has *NV elements and room for *MV.
187 Uses and updates INCLUDED to avoid duplicates if indicated by
188 PV_OPTS, which also affects what variables are allowed in
191 add_variable (struct variable ***v, size_t *nv, size_t *mv,
192 char *included, int pv_opts,
193 const struct var_set *vs, size_t idx)
195 struct variable *add = var_set_get_var (vs, idx);
196 const char *add_name = var_get_name (add);
198 if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add))
199 msg (SW, _("%s is not a numeric variable. It will not be "
200 "included in the variable list."), add_name);
201 else if ((pv_opts & PV_STRING) && !var_is_alpha (add))
202 msg (SE, _("%s is not a string variable. It will not be "
203 "included in the variable list."), add_name);
204 else if ((pv_opts & PV_NO_SCRATCH)
205 && dict_class_from_id (add_name) == DC_SCRATCH)
206 msg (SE, _("Scratch variables (such as %s) are not allowed "
208 else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv
209 && var_get_type (add) != var_get_type ((*v)[0]))
210 msg (SE, _("%s and %s are not the same type. All variables in "
211 "this variable list must be of the same type. %s "
212 "will be omitted from the list."),
213 var_get_name ((*v)[0]), add_name, add_name);
214 else if ((pv_opts & PV_SAME_WIDTH) && *nv
215 && var_get_width (add) != var_get_width ((*v)[0]))
216 msg (SE, _("%s and %s are string variables with different widths. "
217 "All variables in this variable list must have the "
218 "same width. %s will be omitted from the list."),
219 var_get_name ((*v)[0]), add_name, add_name);
220 else if ((pv_opts & PV_NO_DUPLICATE) && included && included[idx])
221 msg (SE, _("Variable %s appears twice in variable list."), add_name);
222 else if ((pv_opts & PV_DUPLICATE) || !included || !included[idx])
227 *v = xnrealloc (*v, *mv, sizeof **v);
230 if (included != NULL)
235 /* Adds the variables in VS with indexes FIRST_IDX through
236 LAST_IDX, inclusive, to the list of variables V that has *NV
237 elements and room for *MV. Uses and updates INCLUDED to avoid
238 duplicates if indicated by PV_OPTS, which also affects what
239 variables are allowed in appropriate ways. */
241 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
243 const struct var_set *vs, int first_idx, int last_idx,
244 enum dict_class class)
248 for (i = first_idx; i <= last_idx; i++)
249 if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class)
250 add_variable (v, nv, mv, included, pv_opts, vs, i);
253 /* Note that if parse_variables() returns false, *v is free()'d.
254 Conversely, if parse_variables() returns true, then *nv is
255 nonzero and *v is non-NULL. */
257 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
258 struct variable ***v, size_t *nv,
268 /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE,
269 PV_SAME_WIDTH may be specified. */
270 assert (((pv_opts & PV_NUMERIC) != 0)
271 + ((pv_opts & PV_STRING) != 0)
272 + ((pv_opts & PV_SAME_TYPE) != 0)
273 + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1);
275 /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
276 assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
278 if (!(pv_opts & PV_APPEND))
287 if (!(pv_opts & PV_DUPLICATE))
291 included = xcalloc (var_set_get_cnt (vs), sizeof *included);
292 for (i = 0; i < *nv; i++)
295 if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index))
305 if (lex_match (lexer, T_ALL))
306 add_variables (v, nv, &mv, included, pv_opts,
307 vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
310 enum dict_class class;
313 if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
316 if (!lex_match (lexer, T_TO))
317 add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
321 enum dict_class last_class;
322 struct variable *first_var, *last_var;
324 if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
327 first_var = var_set_get_var (vs, first_idx);
328 last_var = var_set_get_var (vs, last_idx);
330 if (last_idx < first_idx)
332 const char *first_name = var_get_name (first_var);
333 const char *last_name = var_get_name (last_var);
334 msg (SE, _("%s TO %s is not valid syntax since %s "
335 "precedes %s in the dictionary."),
336 first_name, last_name, first_name, last_name);
340 if (class != last_class)
342 msg (SE, _("When using the TO keyword to specify several "
343 "variables, both variables must be from "
344 "the same variable dictionaries, of either "
345 "ordinary, scratch, or system variables. "
346 "%s is a %s variable, whereas %s is %s."),
347 var_get_name (first_var), dict_class_to_name (class),
348 var_get_name (last_var),
349 dict_class_to_name (last_class));
353 add_variables (v, nv, &mv, included, pv_opts,
354 vs, first_idx, last_idx, class);
358 if (pv_opts & PV_SINGLE)
360 lex_match (lexer, T_COMMA);
362 while (lex_token (lexer) == T_ALL
363 || (is_vs_name_token (lexer, vs)
364 && var_set_lookup_var (vs, lex_tokcstr (lexer)) != NULL));
381 parse_DATA_LIST_var (struct lexer *lexer, const struct dictionary *d)
383 if (!is_dict_name_token (lexer, d))
385 lex_error (lexer, "expecting variable name");
388 if (!dict_id_is_valid (d, lex_tokcstr (lexer), true))
391 char *name = xstrdup (lex_tokcstr (lexer));
396 /* Attempts to break UTF-8 encoded NAME into a root (whose contents are
397 arbitrary except that it does not end in a digit) followed by an integer
398 numeric suffix. On success, stores the value of the suffix into *NUMBERP,
399 the number of digits in the suffix into *N_DIGITSP, and returns the number
400 of bytes in the root. On failure, returns 0. */
402 extract_numeric_suffix (const char *name,
403 unsigned long int *numberp, int *n_digitsp)
405 size_t root_len, n_digits;
408 /* Count length of root. */
409 root_len = 1; /* Valid identifier never starts with digit. */
410 for (i = 1; name[i] != '\0'; i++)
411 if (!c_isdigit (name[i]))
413 n_digits = i - root_len;
417 msg (SE, _("`%s' cannot be used with TO because it does not end in "
422 *numberp = strtoull (name + root_len, NULL, 10);
423 if (*numberp == ULONG_MAX)
425 msg (SE, _("Numeric suffix on `%s' is larger than supported with TO."),
429 *n_digitsp = n_digits;
434 add_var_name (char *name,
435 char ***names, size_t *n_vars, size_t *allocated_vars,
436 struct stringi_set *set, int pv_opts)
438 if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (set, name))
440 msg (SE, _("Variable %s appears twice in variable list."),
445 if (*n_vars >= *allocated_vars)
446 *names = x2nrealloc (*names, allocated_vars, sizeof **names);
447 (*names)[(*n_vars)++] = name;
451 /* Parses a list of variable names according to the DATA LIST version
452 of the TO convention. */
454 parse_DATA_LIST_vars (struct lexer *lexer, const struct dictionary *dict,
455 char ***namesp, size_t *n_varsp, int pv_opts)
459 size_t allocated_vars;
461 struct stringi_set set;
467 assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
468 | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
469 stringi_set_init (&set);
471 if (pv_opts & PV_APPEND)
473 n_vars = allocated_vars = *n_varsp;
476 if (pv_opts & PV_NO_DUPLICATE)
480 for (i = 0; i < n_vars; i++)
481 stringi_set_insert (&set, names[i]);
486 n_vars = allocated_vars = 0;
492 name1 = parse_DATA_LIST_var (lexer, dict);
495 if (dict_class_from_id (name1) == DC_SCRATCH && pv_opts & PV_NO_SCRATCH)
497 msg (SE, _("Scratch variables not allowed here."));
500 if (lex_match (lexer, T_TO))
502 unsigned long int num1, num2;
503 int n_digits1, n_digits2;
504 int root_len1, root_len2;
505 unsigned long int number;
507 char *name2 = parse_DATA_LIST_var (lexer, dict);
509 root_len1 = extract_numeric_suffix (name1, &num1, &n_digits1);
513 root_len2 = extract_numeric_suffix (name2, &num2, &n_digits2);
517 if (root_len1 != root_len2 || memcasecmp (name1, name2, root_len1))
519 msg (SE, _("Prefixes don't match in use of TO convention."));
524 msg (SE, _("Bad bounds in use of TO convention."));
528 for (number = num1; number <= num2; number++)
530 char *name = xasprintf ("%.*s%0*lu",
533 if (!add_var_name (name, &names, &n_vars, &allocated_vars,
547 if (!add_var_name (name1, &names, &n_vars, &allocated_vars,
553 lex_match (lexer, T_COMMA);
555 if (pv_opts & PV_SINGLE)
558 while (lex_token (lexer) == T_ID);
562 stringi_set_destroy (&set);
571 for (i = 0; i < n_vars; i++)
582 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
585 register_vars_pool (struct pool *pool, char **names, size_t nnames)
589 for (i = 0; i < nnames; i++)
590 pool_register (pool, free, names[i]);
591 pool_register (pool, free, names);
594 /* Parses a list of variable names according to the DATA LIST
595 version of the TO convention. Same args as
596 parse_DATA_LIST_vars(), except that all allocations are taken
597 from the given POOL. */
599 parse_DATA_LIST_vars_pool (struct lexer *lexer, const struct dictionary *dict,
601 char ***names, size_t *nnames, int pv_opts)
605 /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
606 the existing names on failure, but those names are
607 presumably already in the pool, which would attempt to
609 assert (!(pv_opts & PV_APPEND));
611 retval = parse_DATA_LIST_vars (lexer, dict, names, nnames, pv_opts);
613 register_vars_pool (pool, *names, *nnames);
617 /* Parses a list of variables where some of the variables may be
618 existing and the rest are to be created. Same args as
619 parse_DATA_LIST_vars(). */
621 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
622 char ***names, size_t *nnames, int pv_opts)
626 assert (names != NULL);
627 assert (nnames != NULL);
629 if (!(pv_opts & PV_APPEND))
634 while (is_dict_name_token (lexer, dict) || lex_token (lexer) == T_ALL)
636 if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokcstr (lexer)) != NULL)
641 if (!parse_variables (lexer, dict, &v, &nv, pv_opts))
643 *names = xnrealloc (*names, *nnames + nv, sizeof **names);
644 for (i = 0; i < nv; i++)
645 (*names)[*nnames + i] = xstrdup (var_get_name (v[i]));
649 else if (!parse_DATA_LIST_vars (lexer, dict, names, nnames, PV_APPEND | pv_opts))
658 for (i = 0; i < *nnames; i++)
666 /* Parses a list of variables where some of the variables may be
667 existing and the rest are to be created. Same args as
668 parse_mixed_vars(), except that all allocations are taken
669 from the given POOL. */
671 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
672 char ***names, size_t *nnames, int pv_opts)
676 /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
677 the existing names on failure, but those names are
678 presumably already in the pool, which would attempt to
680 assert (!(pv_opts & PV_APPEND));
682 retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
684 register_vars_pool (pool, *names, *nnames);
688 /* A set of variables. */
691 bool names_must_be_ids;
692 size_t (*get_cnt) (const struct var_set *);
693 struct variable *(*get_var) (const struct var_set *, size_t idx);
694 bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
695 void (*destroy) (struct var_set *);
699 /* Returns the number of variables in VS. */
701 var_set_get_cnt (const struct var_set *vs)
705 return vs->get_cnt (vs);
708 /* Return variable with index IDX in VS.
709 IDX must be less than the number of variables in VS. */
710 static struct variable *
711 var_set_get_var (const struct var_set *vs, size_t idx)
714 assert (idx < var_set_get_cnt (vs));
716 return vs->get_var (vs, idx);
719 /* Returns the variable in VS named NAME, or a null pointer if VS
720 contains no variable with that name. */
722 var_set_lookup_var (const struct var_set *vs, const char *name)
725 return (var_set_lookup_var_idx (vs, name, &idx)
726 ? var_set_get_var (vs, idx)
730 /* If VS contains a variable named NAME, sets *IDX to its index
731 and returns true. Otherwise, returns false. */
733 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
737 assert (name != NULL);
739 return vs->lookup_var_idx (vs, name, idx);
744 var_set_destroy (struct var_set *vs)
751 var_set_get_names_must_be_ids (const struct var_set *vs)
753 return vs->names_must_be_ids;
756 /* Returns the number of variables in VS. */
758 dict_var_set_get_cnt (const struct var_set *vs)
760 struct dictionary *d = vs->aux;
762 return dict_get_var_cnt (d);
765 /* Return variable with index IDX in VS.
766 IDX must be less than the number of variables in VS. */
767 static struct variable *
768 dict_var_set_get_var (const struct var_set *vs, size_t idx)
770 struct dictionary *d = vs->aux;
772 return dict_get_var (d, idx);
775 /* If VS contains a variable named NAME, sets *IDX to its index
776 and returns true. Otherwise, returns false. */
778 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
781 struct dictionary *d = vs->aux;
782 struct variable *v = dict_lookup_var (d, name);
785 *idx = var_get_dict_index (v);
794 dict_var_set_destroy (struct var_set *vs)
799 /* Returns a variable set based on D. */
801 var_set_create_from_dict (const struct dictionary *d)
803 struct var_set *vs = xmalloc (sizeof *vs);
804 vs->names_must_be_ids = dict_get_names_must_be_ids (d);
805 vs->get_cnt = dict_var_set_get_cnt;
806 vs->get_var = dict_var_set_get_var;
807 vs->lookup_var_idx = dict_var_set_lookup_var_idx;
808 vs->destroy = dict_var_set_destroy;
809 vs->aux = (void *) d;
813 /* A variable set based on an array. */
816 struct variable *const *var;/* Array of variables. */
817 size_t var_cnt; /* Number of elements in var. */
818 struct hmapx vars_by_name; /* Variables hashed by name. */
821 /* Returns the number of variables in VS. */
823 array_var_set_get_cnt (const struct var_set *vs)
825 struct array_var_set *avs = vs->aux;
830 /* Return variable with index IDX in VS.
831 IDX must be less than the number of variables in VS. */
832 static struct variable *
833 array_var_set_get_var (const struct var_set *vs, size_t idx)
835 struct array_var_set *avs = vs->aux;
837 return CONST_CAST (struct variable *, avs->var[idx]);
840 /* If VS contains a variable named NAME, sets *IDX to its index
841 and returns true. Otherwise, returns false. */
843 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
846 struct array_var_set *avs = vs->aux;
847 struct hmapx_node *node;
848 struct variable **varp;
850 HMAPX_FOR_EACH_WITH_HASH (varp, node, utf8_hash_case_string (name, 0),
852 if (!utf8_strcasecmp (name, var_get_name (*varp)))
854 *idx = varp - avs->var;
863 array_var_set_destroy (struct var_set *vs)
865 struct array_var_set *avs = vs->aux;
867 hmapx_destroy (&avs->vars_by_name);
872 /* Returns a variable set based on the VAR_CNT variables in VAR. */
874 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
877 struct array_var_set *avs;
880 vs = xmalloc (sizeof *vs);
881 vs->names_must_be_ids = true;
882 vs->get_cnt = array_var_set_get_cnt;
883 vs->get_var = array_var_set_get_var;
884 vs->lookup_var_idx = array_var_set_lookup_var_idx;
885 vs->destroy = array_var_set_destroy;
886 vs->aux = avs = xmalloc (sizeof *avs);
888 avs->var_cnt = var_cnt;
889 hmapx_init (&avs->vars_by_name);
890 for (i = 0; i < var_cnt; i++)
892 const char *name = var_get_name (var[i]);
895 if (array_var_set_lookup_var_idx (vs, name, &idx))
897 var_set_destroy (vs);
900 hmapx_insert (&avs->vars_by_name, CONST_CAST (void *, &avs->var[i]),
901 utf8_hash_case_string (name, 0));
909 If the match succeeds, the variable will be placed in VAR.
910 Returns true if successful */
912 lex_match_variable (struct lexer *lexer, const struct dictionary *dict, const struct variable **var)
914 if (lex_token (lexer) != T_ID)
917 *var = parse_variable_const (lexer, dict);
924 /* An interaction is a variable followed by {*, BY} followed by an interaction */
926 parse_internal_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact, struct interaction **it)
928 const struct variable *v = NULL;
931 switch (lex_next_token (lexer, 1))
945 if (! lex_match_variable (lexer, dict, &v))
948 interaction_destroy (*it);
956 *iact = interaction_create (v);
958 interaction_add_variable (*iact, v);
960 if ( lex_match (lexer, T_ASTERISK) || lex_match (lexer, T_BY))
962 return parse_internal_interaction (lexer, dict, iact, iact);
969 parse_design_interaction (struct lexer *lexer, const struct dictionary *dict, struct interaction **iact)
971 return parse_internal_interaction (lexer, dict, iact, NULL);