1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 #include <language/lexer/variable-parser.h>
29 #include <data/dictionary.h>
30 #include <data/procedure.h>
31 #include <data/variable.h>
32 #include <libpspp/alloc.h>
33 #include <libpspp/bit-vector.h>
34 #include <libpspp/hash.h>
35 #include <libpspp/message.h>
36 #include <libpspp/misc.h>
37 #include <libpspp/pool.h>
38 #include <libpspp/str.h>
41 #define _(msgid) gettext (msgid)
43 /* Parses a name as a variable within VS. Sets *IDX to the
44 variable's index and returns true if successful. On failure
45 emits an error message and returns false. */
47 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
52 if (lex_token (lexer) != T_ID)
54 lex_error (lexer, _("expecting variable name"));
57 else if (var_set_lookup_var_idx (vs, lex_tokid (lexer), idx))
64 msg (SE, _("%s is not a variable name."), lex_tokid (lexer));
69 /* Parses a name as a variable within VS and returns the variable
70 if successful. On failure emits an error message and returns
72 static struct variable *
73 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
76 return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
79 /* Parses a variable name in dictionary D and returns the
80 variable if successful. On failure emits an error message and
81 returns a null pointer. */
83 parse_variable (struct lexer *lexer, const struct dictionary *d)
85 struct var_set *vs = var_set_create_from_dict (d);
86 struct variable *var = parse_vs_variable (lexer, vs);
91 /* Parses a set of variables from dictionary D given options
92 OPTS. Resulting list of variables stored in *VAR and the
93 number of variables into *CNT. Returns true only if
96 parse_variables (struct lexer *lexer, const struct dictionary *d,
97 struct variable ***var,
98 size_t *cnt, int opts)
104 assert (var != NULL);
105 assert (cnt != NULL);
107 vs = var_set_create_from_dict (d);
108 success = parse_var_set_vars (lexer, vs, var, cnt, opts);
111 var_set_destroy (vs);
115 /* Parses a set of variables from dictionary D given options
116 OPTS. Resulting list of variables stored in *VARS and the
117 number of variables into *VAR_CNT. Returns true only if
118 successful. Same behavior as parse_variables, except that all
119 allocations are taken from the given POOL. */
121 parse_variables_pool (struct lexer *lexer, struct pool *pool,
122 const struct dictionary *dict,
123 struct variable ***vars, size_t *var_cnt, int opts)
127 /* PV_APPEND is unsafe because parse_variables would free the
128 existing names on failure, but those names are presumably
129 already in the pool, which would attempt to re-free it
131 assert (!(opts & PV_APPEND));
133 retval = parse_variables (lexer, dict, vars, var_cnt, opts);
135 pool_register (pool, free, *vars);
139 /* Parses a variable name from VS. If successful, sets *IDX to
140 the variable's index in VS, *CLASS to the variable's
141 dictionary class, and returns true. Returns false on
144 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
146 enum dict_class *class)
148 if (!parse_vs_variable_idx (lexer, vs, idx))
151 *class = dict_class_from_id (var_set_get_var (vs, *idx)->name);
155 /* Add the variable from VS with index IDX to the list of
156 variables V that has *NV elements and room for *MV.
157 Uses and updates INCLUDED to avoid duplicates if indicated by
158 PV_OPTS, which also affects what variables are allowed in
161 add_variable (struct variable ***v, size_t *nv, size_t *mv,
162 char *included, int pv_opts,
163 const struct var_set *vs, size_t idx)
165 struct variable *add = var_set_get_var (vs, idx);
167 if ((pv_opts & PV_NUMERIC) && add->type != NUMERIC)
168 msg (SW, _("%s is not a numeric variable. It will not be "
169 "included in the variable list."), add->name);
170 else if ((pv_opts & PV_STRING) && add->type != ALPHA)
171 msg (SE, _("%s is not a string variable. It will not be "
172 "included in the variable list."), add->name);
173 else if ((pv_opts & PV_NO_SCRATCH)
174 && dict_class_from_id (add->name) == DC_SCRATCH)
175 msg (SE, _("Scratch variables (such as %s) are not allowed "
176 "here."), add->name);
177 else if ((pv_opts & PV_SAME_TYPE) && *nv && add->type != (*v)[0]->type)
178 msg (SE, _("%s and %s are not the same type. All variables in "
179 "this variable list must be of the same type. %s "
180 "will be omitted from list."),
181 (*v)[0]->name, add->name, add->name);
182 else if ((pv_opts & PV_NO_DUPLICATE) && included[idx])
183 msg (SE, _("Variable %s appears twice in variable list."), add->name);
184 else if ((pv_opts & PV_DUPLICATE) || !included[idx])
189 *v = xnrealloc (*v, *mv, sizeof **v);
192 if (included != NULL)
197 /* Adds the variables in VS with indexes FIRST_IDX through
198 LAST_IDX, inclusive, to the list of variables V that has *NV
199 elements and room for *MV. Uses and updates INCLUDED to avoid
200 duplicates if indicated by PV_OPTS, which also affects what
201 variables are allowed in appropriate ways. */
203 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
205 const struct var_set *vs, int first_idx, int last_idx,
206 enum dict_class class)
210 for (i = first_idx; i <= last_idx; i++)
211 if (dict_class_from_id (var_set_get_var (vs, i)->name) == class)
212 add_variable (v, nv, mv, included, pv_opts, vs, i);
215 /* Note that if parse_variables() returns false, *v is free()'d.
216 Conversely, if parse_variables() returns true, then *nv is
217 nonzero and *v is non-NULL. */
219 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
220 struct variable ***v, size_t *nv,
230 /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE may be
232 assert ((((pv_opts & PV_NUMERIC) != 0)
233 + ((pv_opts & PV_STRING) != 0)
234 + ((pv_opts & PV_SAME_TYPE) != 0)) <= 1);
236 /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
237 assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
239 if (!(pv_opts & PV_APPEND))
248 if (!(pv_opts & PV_DUPLICATE))
252 included = xcalloc (var_set_get_cnt (vs), sizeof *included);
253 for (i = 0; i < *nv; i++)
254 included[(*v)[i]->index] = 1;
261 if (lex_match (lexer, T_ALL))
262 add_variables (v, nv, &mv, included, pv_opts,
263 vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
266 enum dict_class class;
269 if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
272 if (!lex_match (lexer, T_TO))
273 add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
277 enum dict_class last_class;
278 struct variable *first_var, *last_var;
280 if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
283 first_var = var_set_get_var (vs, first_idx);
284 last_var = var_set_get_var (vs, last_idx);
286 if (last_idx < first_idx)
288 msg (SE, _("%s TO %s is not valid syntax since %s "
289 "precedes %s in the dictionary."),
290 first_var->name, last_var->name,
291 first_var->name, last_var->name);
295 if (class != last_class)
297 msg (SE, _("When using the TO keyword to specify several "
298 "variables, both variables must be from "
299 "the same variable dictionaries, of either "
300 "ordinary, scratch, or system variables. "
301 "%s is a %s variable, whereas %s is %s."),
302 first_var->name, dict_class_to_name (class),
303 last_var->name, dict_class_to_name (last_class));
307 add_variables (v, nv, &mv, included, pv_opts,
308 vs, first_idx, last_idx, class);
312 if (pv_opts & PV_SINGLE)
314 lex_match (lexer, ',');
316 while (lex_token (lexer) == T_ALL
317 || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokid (lexer)) != NULL));
333 /* Extracts a numeric suffix from variable name S, copying it
334 into string R. Sets *D to the length of R and *N to its
337 extract_num (char *s, char *r, int *n, int *d)
341 /* Find first digit. */
342 cp = s + strlen (s) - 1;
343 while (isdigit ((unsigned char) *cp) && cp > s)
348 strncpy (r, s, cp - s);
351 /* Count initial zeros. */
360 while (isdigit ((unsigned char) *cp))
363 *n = (*n * 10) + (*cp - '0');
368 if (*n == 0 && *d == 0)
370 msg (SE, _("incorrect use of TO convention"));
376 /* Parses a list of variable names according to the DATA LIST version
377 of the TO convention. */
379 parse_DATA_LIST_vars (struct lexer *lexer, char ***names, size_t *nnames, int pv_opts)
385 char name1[LONG_NAME_LEN + 1], name2[LONG_NAME_LEN + 1];
386 char root1[LONG_NAME_LEN + 1], root2[LONG_NAME_LEN + 1];
389 assert (names != NULL);
390 assert (nnames != NULL);
391 assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
392 | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
393 /* FIXME: PV_NO_DUPLICATE is not implemented. */
395 if (pv_opts & PV_APPEND)
396 nvar = mvar = *nnames;
405 if (lex_token (lexer) != T_ID)
407 lex_error (lexer, "expecting variable name");
410 if (dict_class_from_id (lex_tokid (lexer)) == DC_SCRATCH
411 && (pv_opts & PV_NO_SCRATCH))
413 msg (SE, _("Scratch variables not allowed here."));
416 strcpy (name1, lex_tokid (lexer));
418 if (lex_token (lexer) == T_TO)
421 if (lex_token (lexer) != T_ID)
423 lex_error (lexer, "expecting variable name");
426 strcpy (name2, lex_tokid (lexer));
429 if (!extract_num (name1, root1, &n1, &d1)
430 || !extract_num (name2, root2, &n2, &d2))
433 if (strcasecmp (root1, root2))
435 msg (SE, _("Prefixes don't match in use of TO convention."));
440 msg (SE, _("Bad bounds in use of TO convention."));
446 if (mvar < nvar + (n2 - n1 + 1))
448 mvar += ROUND_UP (n2 - n1 + 1, 16);
449 *names = xnrealloc (*names, mvar, sizeof **names);
452 for (n = n1; n <= n2; n++)
454 char name[LONG_NAME_LEN + 1];
455 sprintf (name, "%s%0*d", root1, d1, n);
456 (*names)[nvar] = xstrdup (name);
465 *names = xnrealloc (*names, mvar, sizeof **names);
467 (*names)[nvar++] = xstrdup (name1);
470 lex_match (lexer, ',');
472 if (pv_opts & PV_SINGLE)
475 while (lex_token (lexer) == T_ID);
483 for (i = 0; i < nvar; i++)
492 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
495 register_vars_pool (struct pool *pool, char **names, size_t nnames)
499 for (i = 0; i < nnames; i++)
500 pool_register (pool, free, names[i]);
501 pool_register (pool, free, names);
504 /* Parses a list of variable names according to the DATA LIST
505 version of the TO convention. Same args as
506 parse_DATA_LIST_vars(), except that all allocations are taken
507 from the given POOL. */
509 parse_DATA_LIST_vars_pool (struct lexer *lexer, struct pool *pool,
510 char ***names, size_t *nnames, int pv_opts)
514 /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
515 the existing names on failure, but those names are
516 presumably already in the pool, which would attempt to
518 assert (!(pv_opts & PV_APPEND));
520 retval = parse_DATA_LIST_vars (lexer, names, nnames, pv_opts);
522 register_vars_pool (pool, *names, *nnames);
526 /* Parses a list of variables where some of the variables may be
527 existing and the rest are to be created. Same args as
528 parse_DATA_LIST_vars(). */
530 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
531 char ***names, size_t *nnames, int pv_opts)
535 assert (names != NULL);
536 assert (nnames != NULL);
537 assert ((pv_opts & ~PV_APPEND) == 0);
539 if (!(pv_opts & PV_APPEND))
544 while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
546 if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
551 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
553 *names = xnrealloc (*names, *nnames + nv, sizeof **names);
554 for (i = 0; i < nv; i++)
555 (*names)[*nnames + i] = xstrdup (v[i]->name);
559 else if (!parse_DATA_LIST_vars (lexer, names, nnames, PV_APPEND))
565 for (i = 0; i < *nnames; i++)
573 /* Parses a list of variables where some of the variables may be
574 existing and the rest are to be created. Same args as
575 parse_mixed_vars(), except that all allocations are taken
576 from the given POOL. */
578 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
579 char ***names, size_t *nnames, int pv_opts)
583 /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
584 the existing names on failure, but those names are
585 presumably already in the pool, which would attempt to
587 assert (!(pv_opts & PV_APPEND));
589 retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
591 register_vars_pool (pool, *names, *nnames);
595 /* A set of variables. */
598 size_t (*get_cnt) (const struct var_set *);
599 struct variable *(*get_var) (const struct var_set *, size_t idx);
600 bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
601 void (*destroy) (struct var_set *);
605 /* Returns the number of variables in VS. */
607 var_set_get_cnt (const struct var_set *vs)
611 return vs->get_cnt (vs);
614 /* Return variable with index IDX in VS.
615 IDX must be less than the number of variables in VS. */
617 var_set_get_var (const struct var_set *vs, size_t idx)
620 assert (idx < var_set_get_cnt (vs));
622 return vs->get_var (vs, idx);
625 /* Returns the variable in VS named NAME, or a null pointer if VS
626 contains no variable with that name. */
628 var_set_lookup_var (const struct var_set *vs, const char *name)
631 return (var_set_lookup_var_idx (vs, name, &idx)
632 ? var_set_get_var (vs, idx)
636 /* If VS contains a variable named NAME, sets *IDX to its index
637 and returns true. Otherwise, returns false. */
639 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
643 assert (name != NULL);
644 assert (strlen (name) <= LONG_NAME_LEN);
646 return vs->lookup_var_idx (vs, name, idx);
651 var_set_destroy (struct var_set *vs)
657 /* Returns the number of variables in VS. */
659 dict_var_set_get_cnt (const struct var_set *vs)
661 struct dictionary *d = vs->aux;
663 return dict_get_var_cnt (d);
666 /* Return variable with index IDX in VS.
667 IDX must be less than the number of variables in VS. */
668 static struct variable *
669 dict_var_set_get_var (const struct var_set *vs, size_t idx)
671 struct dictionary *d = vs->aux;
673 return dict_get_var (d, idx);
676 /* If VS contains a variable named NAME, sets *IDX to its index
677 and returns true. Otherwise, returns false. */
679 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
682 struct dictionary *d = vs->aux;
683 struct variable *v = dict_lookup_var (d, name);
695 dict_var_set_destroy (struct var_set *vs)
700 /* Returns a variable set based on D. */
702 var_set_create_from_dict (const struct dictionary *d)
704 struct var_set *vs = xmalloc (sizeof *vs);
705 vs->get_cnt = dict_var_set_get_cnt;
706 vs->get_var = dict_var_set_get_var;
707 vs->lookup_var_idx = dict_var_set_lookup_var_idx;
708 vs->destroy = dict_var_set_destroy;
709 vs->aux = (void *) d;
713 /* A variable set based on an array. */
716 struct variable *const *var;/* Array of variables. */
717 size_t var_cnt; /* Number of elements in var. */
718 struct hsh_table *name_tab; /* Hash from variable names to variables. */
721 /* Returns the number of variables in VS. */
723 array_var_set_get_cnt (const struct var_set *vs)
725 struct array_var_set *avs = vs->aux;
730 /* Return variable with index IDX in VS.
731 IDX must be less than the number of variables in VS. */
732 static struct variable *
733 array_var_set_get_var (const struct var_set *vs, size_t idx)
735 struct array_var_set *avs = vs->aux;
737 return (struct variable *) avs->var[idx];
740 /* If VS contains a variable named NAME, sets *IDX to its index
741 and returns true. Otherwise, returns false. */
743 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
746 struct array_var_set *avs = vs->aux;
747 struct variable v, *vp, *const *vpp;
749 strcpy (v.name, name);
751 vpp = hsh_find (avs->name_tab, &vp);
754 *idx = vpp - avs->var;
763 array_var_set_destroy (struct var_set *vs)
765 struct array_var_set *avs = vs->aux;
767 hsh_destroy (avs->name_tab);
772 /* Returns a variable set based on the VAR_CNT variables in
775 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
778 struct array_var_set *avs;
781 vs = xmalloc (sizeof *vs);
782 vs->get_cnt = array_var_set_get_cnt;
783 vs->get_var = array_var_set_get_var;
784 vs->lookup_var_idx = array_var_set_lookup_var_idx;
785 vs->destroy = array_var_set_destroy;
786 vs->aux = avs = xmalloc (sizeof *avs);
788 avs->var_cnt = var_cnt;
789 avs->name_tab = hsh_create (2 * var_cnt,
790 compare_var_ptr_names, hash_var_ptr_name, NULL,
792 for (i = 0; i < var_cnt; i++)
793 if (hsh_insert (avs->name_tab, (void *) &var[i]) != NULL)
795 var_set_destroy (vs);