1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/lexer/variable-parser.h"
25 #include "data/dictionary.h"
26 #include "data/procedure.h"
27 #include "data/variable.h"
28 #include "language/lexer/lexer.h"
29 #include "libpspp/assertion.h"
30 #include "libpspp/cast.h"
31 #include "libpspp/hash-functions.h"
32 #include "libpspp/hmapx.h"
33 #include "libpspp/message.h"
34 #include "libpspp/misc.h"
35 #include "libpspp/pool.h"
36 #include "libpspp/str.h"
37 #include "libpspp/stringi-set.h"
39 #include "gl/xalloc.h"
42 #define _(msgid) gettext (msgid)
44 static struct variable * var_set_get_var (const struct var_set *, size_t );
46 static struct variable *var_set_lookup_var (const struct var_set *,
49 static bool var_set_lookup_var_idx (const struct var_set *, const char *,
54 /* Parses a name as a variable within VS. Sets *IDX to the
55 variable's index and returns true if successful. On failure
56 emits an error message and returns false. */
58 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
63 if (lex_token (lexer) != T_ID)
65 lex_error (lexer, _("expecting variable name"));
68 else if (var_set_lookup_var_idx (vs, lex_tokid (lexer), idx))
75 msg (SE, _("%s is not a variable name."), lex_tokid (lexer));
80 /* Parses a name as a variable within VS and returns the variable
81 if successful. On failure emits an error message and returns
83 static struct variable *
84 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
87 return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
90 /* Parses a variable name in dictionary D and returns the
91 variable if successful. On failure emits an error message and
92 returns a null pointer. */
94 parse_variable (struct lexer *lexer, const struct dictionary *d)
96 struct var_set *vs = var_set_create_from_dict (d);
97 struct variable *var = parse_vs_variable (lexer, vs);
102 /* Parses a set of variables from dictionary D given options
103 OPTS. Resulting list of variables stored in *VAR and the
104 number of variables into *CNT. Returns true only if
107 parse_variables (struct lexer *lexer, const struct dictionary *d,
108 struct variable ***var,
109 size_t *cnt, int opts)
115 assert (var != NULL);
116 assert (cnt != NULL);
118 vs = var_set_create_from_dict (d);
119 success = parse_var_set_vars (lexer, vs, var, cnt, opts);
120 var_set_destroy (vs);
124 /* Parses a set of variables from dictionary D given options
125 OPTS. Resulting list of variables stored in *VARS and the
126 number of variables into *VAR_CNT. Returns true only if
127 successful. Same behavior as parse_variables, except that all
128 allocations are taken from the given POOL. */
130 parse_variables_pool (struct lexer *lexer, struct pool *pool,
131 const struct dictionary *dict,
132 struct variable ***vars, size_t *var_cnt, int opts)
136 /* PV_APPEND is unsafe because parse_variables would free the
137 existing names on failure, but those names are presumably
138 already in the pool, which would attempt to re-free it
140 assert (!(opts & PV_APPEND));
142 retval = parse_variables (lexer, dict, vars, var_cnt, opts);
144 pool_register (pool, free, *vars);
148 /* Parses a variable name from VS. If successful, sets *IDX to
149 the variable's index in VS, *CLASS to the variable's
150 dictionary class, and returns true. Returns false on
153 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
155 enum dict_class *class)
157 if (!parse_vs_variable_idx (lexer, vs, idx))
160 *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx)));
164 /* Add the variable from VS with index IDX to the list of
165 variables V that has *NV elements and room for *MV.
166 Uses and updates INCLUDED to avoid duplicates if indicated by
167 PV_OPTS, which also affects what variables are allowed in
170 add_variable (struct variable ***v, size_t *nv, size_t *mv,
171 char *included, int pv_opts,
172 const struct var_set *vs, size_t idx)
174 struct variable *add = var_set_get_var (vs, idx);
175 const char *add_name = var_get_name (add);
177 if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add))
178 msg (SW, _("%s is not a numeric variable. It will not be "
179 "included in the variable list."), add_name);
180 else if ((pv_opts & PV_STRING) && !var_is_alpha (add))
181 msg (SE, _("%s is not a string variable. It will not be "
182 "included in the variable list."), add_name);
183 else if ((pv_opts & PV_NO_SCRATCH)
184 && dict_class_from_id (add_name) == DC_SCRATCH)
185 msg (SE, _("Scratch variables (such as %s) are not allowed "
187 else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv
188 && var_get_type (add) != var_get_type ((*v)[0]))
189 msg (SE, _("%s and %s are not the same type. All variables in "
190 "this variable list must be of the same type. %s "
191 "will be omitted from the list."),
192 var_get_name ((*v)[0]), add_name, add_name);
193 else if ((pv_opts & PV_SAME_WIDTH) && *nv
194 && var_get_width (add) != var_get_width ((*v)[0]))
195 msg (SE, _("%s and %s are string variables with different widths. "
196 "All variables in this variable list must have the "
197 "same width. %s will be omitted from the list."),
198 var_get_name ((*v)[0]), add_name, add_name);
199 else if ((pv_opts & PV_NO_DUPLICATE) && included[idx])
200 msg (SE, _("Variable %s appears twice in variable list."), add_name);
201 else if ((pv_opts & PV_DUPLICATE) || !included[idx])
206 *v = xnrealloc (*v, *mv, sizeof **v);
209 if (included != NULL)
214 /* Adds the variables in VS with indexes FIRST_IDX through
215 LAST_IDX, inclusive, to the list of variables V that has *NV
216 elements and room for *MV. Uses and updates INCLUDED to avoid
217 duplicates if indicated by PV_OPTS, which also affects what
218 variables are allowed in appropriate ways. */
220 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
222 const struct var_set *vs, int first_idx, int last_idx,
223 enum dict_class class)
227 for (i = first_idx; i <= last_idx; i++)
228 if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class)
229 add_variable (v, nv, mv, included, pv_opts, vs, i);
232 /* Note that if parse_variables() returns false, *v is free()'d.
233 Conversely, if parse_variables() returns true, then *nv is
234 nonzero and *v is non-NULL. */
236 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
237 struct variable ***v, size_t *nv,
247 /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE,
248 PV_SAME_WIDTH may be specified. */
249 assert (((pv_opts & PV_NUMERIC) != 0)
250 + ((pv_opts & PV_STRING) != 0)
251 + ((pv_opts & PV_SAME_TYPE) != 0)
252 + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1);
254 /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
255 assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
257 if (!(pv_opts & PV_APPEND))
266 if (!(pv_opts & PV_DUPLICATE))
270 included = xcalloc (var_set_get_cnt (vs), sizeof *included);
271 for (i = 0; i < *nv; i++)
274 if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index))
284 if (lex_match (lexer, T_ALL))
285 add_variables (v, nv, &mv, included, pv_opts,
286 vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
289 enum dict_class class;
292 if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
295 if (!lex_match (lexer, T_TO))
296 add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
300 enum dict_class last_class;
301 struct variable *first_var, *last_var;
303 if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
306 first_var = var_set_get_var (vs, first_idx);
307 last_var = var_set_get_var (vs, last_idx);
309 if (last_idx < first_idx)
311 const char *first_name = var_get_name (first_var);
312 const char *last_name = var_get_name (last_var);
313 msg (SE, _("%s TO %s is not valid syntax since %s "
314 "precedes %s in the dictionary."),
315 first_name, last_name, first_name, last_name);
319 if (class != last_class)
321 msg (SE, _("When using the TO keyword to specify several "
322 "variables, both variables must be from "
323 "the same variable dictionaries, of either "
324 "ordinary, scratch, or system variables. "
325 "%s is a %s variable, whereas %s is %s."),
326 var_get_name (first_var), dict_class_to_name (class),
327 var_get_name (last_var),
328 dict_class_to_name (last_class));
332 add_variables (v, nv, &mv, included, pv_opts,
333 vs, first_idx, last_idx, class);
337 if (pv_opts & PV_SINGLE)
339 lex_match (lexer, ',');
341 while (lex_token (lexer) == T_ALL
342 || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokid (lexer)) != NULL));
358 /* Extracts a numeric suffix from variable name S, copying it
359 into string R. Sets *D to the length of R and *N to its
362 extract_num (char *s, char *r, int *n, int *d)
366 /* Find first digit. */
367 cp = s + strlen (s) - 1;
368 while (isdigit ((unsigned char) *cp) && cp > s)
373 strncpy (r, s, cp - s);
376 /* Count initial zeros. */
385 while (isdigit ((unsigned char) *cp))
388 *n = (*n * 10) + (*cp - '0');
393 if (*n == 0 && *d == 0)
395 msg (SE, _("incorrect use of TO convention"));
401 /* Parses a list of variable names according to the DATA LIST version
402 of the TO convention. */
404 parse_DATA_LIST_vars (struct lexer *lexer, char ***names,
405 size_t *nnames, int pv_opts)
411 char name1[VAR_NAME_LEN + 1], name2[VAR_NAME_LEN + 1];
412 char root1[VAR_NAME_LEN + 1], root2[VAR_NAME_LEN + 1];
413 struct stringi_set set;
416 assert (names != NULL);
417 assert (nnames != NULL);
418 assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
419 | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
420 stringi_set_init (&set);
422 if (pv_opts & PV_APPEND)
424 nvar = mvar = *nnames;
426 if (pv_opts & PV_NO_DUPLICATE)
430 for (i = 0; i < nvar; i++)
431 stringi_set_insert (&set, (*names)[i]);
442 if (lex_token (lexer) != T_ID)
444 lex_error (lexer, "expecting variable name");
447 if (dict_class_from_id (lex_tokid (lexer)) == DC_SCRATCH
448 && (pv_opts & PV_NO_SCRATCH))
450 msg (SE, _("Scratch variables not allowed here."));
453 strcpy (name1, lex_tokid (lexer));
455 if (lex_token (lexer) == T_TO)
458 if (lex_token (lexer) != T_ID)
460 lex_error (lexer, "expecting variable name");
463 strcpy (name2, lex_tokid (lexer));
466 if (!extract_num (name1, root1, &n1, &d1)
467 || !extract_num (name2, root2, &n2, &d2))
470 if (strcasecmp (root1, root2))
472 msg (SE, _("Prefixes don't match in use of TO convention."));
477 msg (SE, _("Bad bounds in use of TO convention."));
483 if (mvar < nvar + (n2 - n1 + 1))
485 mvar += ROUND_UP (n2 - n1 + 1, 16);
486 *names = xnrealloc (*names, mvar, sizeof **names);
489 for (n = n1; n <= n2; n++)
491 char name[VAR_NAME_LEN + 1];
492 sprintf (name, "%s%0*d", root1, d1, n);
494 if (pv_opts & PV_NO_DUPLICATE && !stringi_set_insert (&set, name))
496 msg (SE, _("Variable %s appears twice in variable list."),
500 (*names)[nvar] = xstrdup (name);
509 *names = xnrealloc (*names, mvar, sizeof **names);
511 (*names)[nvar++] = xstrdup (name1);
514 lex_match (lexer, ',');
516 if (pv_opts & PV_SINGLE)
519 while (lex_token (lexer) == T_ID);
524 stringi_set_destroy (&set);
528 for (i = 0; i < nvar; i++)
537 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
540 register_vars_pool (struct pool *pool, char **names, size_t nnames)
544 for (i = 0; i < nnames; i++)
545 pool_register (pool, free, names[i]);
546 pool_register (pool, free, names);
549 /* Parses a list of variable names according to the DATA LIST
550 version of the TO convention. Same args as
551 parse_DATA_LIST_vars(), except that all allocations are taken
552 from the given POOL. */
554 parse_DATA_LIST_vars_pool (struct lexer *lexer, struct pool *pool,
555 char ***names, size_t *nnames, int pv_opts)
559 /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
560 the existing names on failure, but those names are
561 presumably already in the pool, which would attempt to
563 assert (!(pv_opts & PV_APPEND));
565 retval = parse_DATA_LIST_vars (lexer, names, nnames, pv_opts);
567 register_vars_pool (pool, *names, *nnames);
571 /* Parses a list of variables where some of the variables may be
572 existing and the rest are to be created. Same args as
573 parse_DATA_LIST_vars(). */
575 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
576 char ***names, size_t *nnames, int pv_opts)
580 assert (names != NULL);
581 assert (nnames != NULL);
582 assert ((pv_opts & ~PV_APPEND) == 0);
584 if (!(pv_opts & PV_APPEND))
589 while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
591 if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
596 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
598 *names = xnrealloc (*names, *nnames + nv, sizeof **names);
599 for (i = 0; i < nv; i++)
600 (*names)[*nnames + i] = xstrdup (var_get_name (v[i]));
604 else if (!parse_DATA_LIST_vars (lexer, names, nnames, PV_APPEND))
610 for (i = 0; i < *nnames; i++)
618 /* Parses a list of variables where some of the variables may be
619 existing and the rest are to be created. Same args as
620 parse_mixed_vars(), except that all allocations are taken
621 from the given POOL. */
623 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
624 char ***names, size_t *nnames, int pv_opts)
628 /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
629 the existing names on failure, but those names are
630 presumably already in the pool, which would attempt to
632 assert (!(pv_opts & PV_APPEND));
634 retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
636 register_vars_pool (pool, *names, *nnames);
640 /* A set of variables. */
643 size_t (*get_cnt) (const struct var_set *);
644 struct variable *(*get_var) (const struct var_set *, size_t idx);
645 bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
646 void (*destroy) (struct var_set *);
650 /* Returns the number of variables in VS. */
652 var_set_get_cnt (const struct var_set *vs)
656 return vs->get_cnt (vs);
659 /* Return variable with index IDX in VS.
660 IDX must be less than the number of variables in VS. */
661 static struct variable *
662 var_set_get_var (const struct var_set *vs, size_t idx)
665 assert (idx < var_set_get_cnt (vs));
667 return vs->get_var (vs, idx);
670 /* Returns the variable in VS named NAME, or a null pointer if VS
671 contains no variable with that name. */
673 var_set_lookup_var (const struct var_set *vs, const char *name)
676 return (var_set_lookup_var_idx (vs, name, &idx)
677 ? var_set_get_var (vs, idx)
681 /* If VS contains a variable named NAME, sets *IDX to its index
682 and returns true. Otherwise, returns false. */
684 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
688 assert (name != NULL);
689 assert (strlen (name) <= VAR_NAME_LEN);
691 return vs->lookup_var_idx (vs, name, idx);
696 var_set_destroy (struct var_set *vs)
702 /* Returns the number of variables in VS. */
704 dict_var_set_get_cnt (const struct var_set *vs)
706 struct dictionary *d = vs->aux;
708 return dict_get_var_cnt (d);
711 /* Return variable with index IDX in VS.
712 IDX must be less than the number of variables in VS. */
713 static struct variable *
714 dict_var_set_get_var (const struct var_set *vs, size_t idx)
716 struct dictionary *d = vs->aux;
718 return dict_get_var (d, idx);
721 /* If VS contains a variable named NAME, sets *IDX to its index
722 and returns true. Otherwise, returns false. */
724 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
727 struct dictionary *d = vs->aux;
728 struct variable *v = dict_lookup_var (d, name);
731 *idx = var_get_dict_index (v);
740 dict_var_set_destroy (struct var_set *vs)
745 /* Returns a variable set based on D. */
747 var_set_create_from_dict (const struct dictionary *d)
749 struct var_set *vs = xmalloc (sizeof *vs);
750 vs->get_cnt = dict_var_set_get_cnt;
751 vs->get_var = dict_var_set_get_var;
752 vs->lookup_var_idx = dict_var_set_lookup_var_idx;
753 vs->destroy = dict_var_set_destroy;
754 vs->aux = (void *) d;
758 /* A variable set based on an array. */
761 struct variable *const *var;/* Array of variables. */
762 size_t var_cnt; /* Number of elements in var. */
763 struct hmapx vars_by_name; /* Variables hashed by name. */
766 /* Returns the number of variables in VS. */
768 array_var_set_get_cnt (const struct var_set *vs)
770 struct array_var_set *avs = vs->aux;
775 /* Return variable with index IDX in VS.
776 IDX must be less than the number of variables in VS. */
777 static struct variable *
778 array_var_set_get_var (const struct var_set *vs, size_t idx)
780 struct array_var_set *avs = vs->aux;
782 return CONST_CAST (struct variable *, avs->var[idx]);
785 /* If VS contains a variable named NAME, sets *IDX to its index
786 and returns true. Otherwise, returns false. */
788 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
791 struct array_var_set *avs = vs->aux;
792 struct hmapx_node *node;
793 struct variable **varp;
795 HMAPX_FOR_EACH_WITH_HASH (varp, node, hash_case_string (name, 0),
797 if (!strcasecmp (name, var_get_name (*varp)))
799 *idx = varp - avs->var;
808 array_var_set_destroy (struct var_set *vs)
810 struct array_var_set *avs = vs->aux;
812 hmapx_destroy (&avs->vars_by_name);
817 /* Returns a variable set based on the VAR_CNT variables in VAR. */
819 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
822 struct array_var_set *avs;
825 vs = xmalloc (sizeof *vs);
826 vs->get_cnt = array_var_set_get_cnt;
827 vs->get_var = array_var_set_get_var;
828 vs->lookup_var_idx = array_var_set_lookup_var_idx;
829 vs->destroy = array_var_set_destroy;
830 vs->aux = avs = xmalloc (sizeof *avs);
832 avs->var_cnt = var_cnt;
833 hmapx_init (&avs->vars_by_name);
834 for (i = 0; i < var_cnt; i++)
836 const char *name = var_get_name (var[i]);
839 if (array_var_set_lookup_var_idx (vs, name, &idx))
841 var_set_destroy (vs);
844 hmapx_insert (&avs->vars_by_name, CONST_CAST (void *, &avs->var[i]),
845 hash_case_string (name, 0));