1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/lexer/variable-parser.h>
26 #include <data/dictionary.h>
27 #include <data/procedure.h>
28 #include <data/variable.h>
29 #include <libpspp/assertion.h>
30 #include <libpspp/bit-vector.h>
31 #include <libpspp/hash.h>
32 #include <libpspp/message.h>
33 #include <libpspp/misc.h>
34 #include <libpspp/pool.h>
35 #include <libpspp/str.h>
40 #define _(msgid) gettext (msgid)
42 static struct variable * var_set_get_var (const struct var_set *, size_t );
44 static struct variable *var_set_lookup_var (const struct var_set *,
47 static bool var_set_lookup_var_idx (const struct var_set *, const char *,
52 /* Parses a name as a variable within VS. Sets *IDX to the
53 variable's index and returns true if successful. On failure
54 emits an error message and returns false. */
56 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
61 if (lex_token (lexer) != T_ID)
63 lex_error (lexer, _("expecting variable name"));
66 else if (var_set_lookup_var_idx (vs, lex_tokid (lexer), idx))
73 msg (SE, _("%s is not a variable name."), lex_tokid (lexer));
78 /* Parses a name as a variable within VS and returns the variable
79 if successful. On failure emits an error message and returns
81 static struct variable *
82 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
85 return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
88 /* Parses a variable name in dictionary D and returns the
89 variable if successful. On failure emits an error message and
90 returns a null pointer. */
92 parse_variable (struct lexer *lexer, const struct dictionary *d)
94 struct var_set *vs = var_set_create_from_dict (d);
95 struct variable *var = parse_vs_variable (lexer, vs);
100 /* Parses a set of variables from dictionary D given options
101 OPTS. Resulting list of variables stored in *VAR and the
102 number of variables into *CNT. Returns true only if
105 parse_variables (struct lexer *lexer, const struct dictionary *d,
106 struct variable ***var,
107 size_t *cnt, int opts)
113 assert (var != NULL);
114 assert (cnt != NULL);
116 vs = var_set_create_from_dict (d);
117 success = parse_var_set_vars (lexer, vs, var, cnt, opts);
118 var_set_destroy (vs);
122 /* Parses a set of variables from dictionary D given options
123 OPTS. Resulting list of variables stored in *VARS and the
124 number of variables into *VAR_CNT. Returns true only if
125 successful. Same behavior as parse_variables, except that all
126 allocations are taken from the given POOL. */
128 parse_variables_pool (struct lexer *lexer, struct pool *pool,
129 const struct dictionary *dict,
130 struct variable ***vars, size_t *var_cnt, int opts)
134 /* PV_APPEND is unsafe because parse_variables would free the
135 existing names on failure, but those names are presumably
136 already in the pool, which would attempt to re-free it
138 assert (!(opts & PV_APPEND));
140 retval = parse_variables (lexer, dict, vars, var_cnt, opts);
142 pool_register (pool, free, *vars);
146 /* Parses a variable name from VS. If successful, sets *IDX to
147 the variable's index in VS, *CLASS to the variable's
148 dictionary class, and returns true. Returns false on
151 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
153 enum dict_class *class)
155 if (!parse_vs_variable_idx (lexer, vs, idx))
158 *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx)));
162 /* Add the variable from VS with index IDX to the list of
163 variables V that has *NV elements and room for *MV.
164 Uses and updates INCLUDED to avoid duplicates if indicated by
165 PV_OPTS, which also affects what variables are allowed in
168 add_variable (struct variable ***v, size_t *nv, size_t *mv,
169 char *included, int pv_opts,
170 const struct var_set *vs, size_t idx)
172 struct variable *add = var_set_get_var (vs, idx);
173 const char *add_name = var_get_name (add);
175 if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add))
176 msg (SW, _("%s is not a numeric variable. It will not be "
177 "included in the variable list."), add_name);
178 else if ((pv_opts & PV_STRING) && !var_is_alpha (add))
179 msg (SE, _("%s is not a string variable. It will not be "
180 "included in the variable list."), add_name);
181 else if ((pv_opts & PV_NO_SCRATCH)
182 && dict_class_from_id (add_name) == DC_SCRATCH)
183 msg (SE, _("Scratch variables (such as %s) are not allowed "
185 else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv
186 && var_get_type (add) != var_get_type ((*v)[0]))
187 msg (SE, _("%s and %s are not the same type. All variables in "
188 "this variable list must be of the same type. %s "
189 "will be omitted from the list."),
190 var_get_name ((*v)[0]), add_name, add_name);
191 else if ((pv_opts & PV_SAME_WIDTH) && *nv
192 && var_get_width (add) != var_get_width ((*v)[0]))
193 msg (SE, _("%s and %s are string variables with different widths. "
194 "All variables in this variable list must have the "
195 "same width. %s will be omitted from the list."),
196 var_get_name ((*v)[0]), add_name, add_name);
197 else if ((pv_opts & PV_NO_DUPLICATE) && included[idx])
198 msg (SE, _("Variable %s appears twice in variable list."), add_name);
199 else if ((pv_opts & PV_DUPLICATE) || !included[idx])
204 *v = xnrealloc (*v, *mv, sizeof **v);
207 if (included != NULL)
212 /* Adds the variables in VS with indexes FIRST_IDX through
213 LAST_IDX, inclusive, to the list of variables V that has *NV
214 elements and room for *MV. Uses and updates INCLUDED to avoid
215 duplicates if indicated by PV_OPTS, which also affects what
216 variables are allowed in appropriate ways. */
218 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
220 const struct var_set *vs, int first_idx, int last_idx,
221 enum dict_class class)
225 for (i = first_idx; i <= last_idx; i++)
226 if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class)
227 add_variable (v, nv, mv, included, pv_opts, vs, i);
230 /* Note that if parse_variables() returns false, *v is free()'d.
231 Conversely, if parse_variables() returns true, then *nv is
232 nonzero and *v is non-NULL. */
234 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
235 struct variable ***v, size_t *nv,
245 /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE,
246 PV_SAME_WIDTH may be specified. */
247 assert (((pv_opts & PV_NUMERIC) != 0)
248 + ((pv_opts & PV_STRING) != 0)
249 + ((pv_opts & PV_SAME_TYPE) != 0)
250 + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1);
252 /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
253 assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
255 if (!(pv_opts & PV_APPEND))
264 if (!(pv_opts & PV_DUPLICATE))
268 included = xcalloc (var_set_get_cnt (vs), sizeof *included);
269 for (i = 0; i < *nv; i++)
272 if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index))
282 if (lex_match (lexer, T_ALL))
283 add_variables (v, nv, &mv, included, pv_opts,
284 vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
287 enum dict_class class;
290 if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
293 if (!lex_match (lexer, T_TO))
294 add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
298 enum dict_class last_class;
299 struct variable *first_var, *last_var;
301 if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
304 first_var = var_set_get_var (vs, first_idx);
305 last_var = var_set_get_var (vs, last_idx);
307 if (last_idx < first_idx)
309 const char *first_name = var_get_name (first_var);
310 const char *last_name = var_get_name (last_var);
311 msg (SE, _("%s TO %s is not valid syntax since %s "
312 "precedes %s in the dictionary."),
313 first_name, last_name, first_name, last_name);
317 if (class != last_class)
319 msg (SE, _("When using the TO keyword to specify several "
320 "variables, both variables must be from "
321 "the same variable dictionaries, of either "
322 "ordinary, scratch, or system variables. "
323 "%s is a %s variable, whereas %s is %s."),
324 var_get_name (first_var), dict_class_to_name (class),
325 var_get_name (last_var),
326 dict_class_to_name (last_class));
330 add_variables (v, nv, &mv, included, pv_opts,
331 vs, first_idx, last_idx, class);
335 if (pv_opts & PV_SINGLE)
337 lex_match (lexer, ',');
339 while (lex_token (lexer) == T_ALL
340 || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokid (lexer)) != NULL));
356 /* Extracts a numeric suffix from variable name S, copying it
357 into string R. Sets *D to the length of R and *N to its
360 extract_num (char *s, char *r, int *n, int *d)
364 /* Find first digit. */
365 cp = s + strlen (s) - 1;
366 while (isdigit ((unsigned char) *cp) && cp > s)
371 strncpy (r, s, cp - s);
374 /* Count initial zeros. */
383 while (isdigit ((unsigned char) *cp))
386 *n = (*n * 10) + (*cp - '0');
391 if (*n == 0 && *d == 0)
393 msg (SE, _("incorrect use of TO convention"));
399 /* Parses a list of variable names according to the DATA LIST version
400 of the TO convention. */
402 parse_DATA_LIST_vars (struct lexer *lexer, char ***names, size_t *nnames, int pv_opts)
408 char name1[VAR_NAME_LEN + 1], name2[VAR_NAME_LEN + 1];
409 char root1[VAR_NAME_LEN + 1], root2[VAR_NAME_LEN + 1];
412 assert (names != NULL);
413 assert (nnames != NULL);
414 assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
415 | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
416 /* FIXME: PV_NO_DUPLICATE is not implemented. */
418 if (pv_opts & PV_APPEND)
419 nvar = mvar = *nnames;
428 if (lex_token (lexer) != T_ID)
430 lex_error (lexer, "expecting variable name");
433 if (dict_class_from_id (lex_tokid (lexer)) == DC_SCRATCH
434 && (pv_opts & PV_NO_SCRATCH))
436 msg (SE, _("Scratch variables not allowed here."));
439 strcpy (name1, lex_tokid (lexer));
441 if (lex_token (lexer) == T_TO)
444 if (lex_token (lexer) != T_ID)
446 lex_error (lexer, "expecting variable name");
449 strcpy (name2, lex_tokid (lexer));
452 if (!extract_num (name1, root1, &n1, &d1)
453 || !extract_num (name2, root2, &n2, &d2))
456 if (strcasecmp (root1, root2))
458 msg (SE, _("Prefixes don't match in use of TO convention."));
463 msg (SE, _("Bad bounds in use of TO convention."));
469 if (mvar < nvar + (n2 - n1 + 1))
471 mvar += ROUND_UP (n2 - n1 + 1, 16);
472 *names = xnrealloc (*names, mvar, sizeof **names);
475 for (n = n1; n <= n2; n++)
477 char name[VAR_NAME_LEN + 1];
478 sprintf (name, "%s%0*d", root1, d1, n);
479 (*names)[nvar] = xstrdup (name);
488 *names = xnrealloc (*names, mvar, sizeof **names);
490 (*names)[nvar++] = xstrdup (name1);
493 lex_match (lexer, ',');
495 if (pv_opts & PV_SINGLE)
498 while (lex_token (lexer) == T_ID);
506 for (i = 0; i < nvar; i++)
515 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
518 register_vars_pool (struct pool *pool, char **names, size_t nnames)
522 for (i = 0; i < nnames; i++)
523 pool_register (pool, free, names[i]);
524 pool_register (pool, free, names);
527 /* Parses a list of variable names according to the DATA LIST
528 version of the TO convention. Same args as
529 parse_DATA_LIST_vars(), except that all allocations are taken
530 from the given POOL. */
532 parse_DATA_LIST_vars_pool (struct lexer *lexer, struct pool *pool,
533 char ***names, size_t *nnames, int pv_opts)
537 /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
538 the existing names on failure, but those names are
539 presumably already in the pool, which would attempt to
541 assert (!(pv_opts & PV_APPEND));
543 retval = parse_DATA_LIST_vars (lexer, names, nnames, pv_opts);
545 register_vars_pool (pool, *names, *nnames);
549 /* Parses a list of variables where some of the variables may be
550 existing and the rest are to be created. Same args as
551 parse_DATA_LIST_vars(). */
553 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
554 char ***names, size_t *nnames, int pv_opts)
558 assert (names != NULL);
559 assert (nnames != NULL);
560 assert ((pv_opts & ~PV_APPEND) == 0);
562 if (!(pv_opts & PV_APPEND))
567 while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
569 if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
574 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
576 *names = xnrealloc (*names, *nnames + nv, sizeof **names);
577 for (i = 0; i < nv; i++)
578 (*names)[*nnames + i] = xstrdup (var_get_name (v[i]));
582 else if (!parse_DATA_LIST_vars (lexer, names, nnames, PV_APPEND))
588 for (i = 0; i < *nnames; i++)
596 /* Parses a list of variables where some of the variables may be
597 existing and the rest are to be created. Same args as
598 parse_mixed_vars(), except that all allocations are taken
599 from the given POOL. */
601 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
602 char ***names, size_t *nnames, int pv_opts)
606 /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
607 the existing names on failure, but those names are
608 presumably already in the pool, which would attempt to
610 assert (!(pv_opts & PV_APPEND));
612 retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
614 register_vars_pool (pool, *names, *nnames);
618 /* A set of variables. */
621 size_t (*get_cnt) (const struct var_set *);
622 struct variable *(*get_var) (const struct var_set *, size_t idx);
623 bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
624 void (*destroy) (struct var_set *);
628 /* Returns the number of variables in VS. */
630 var_set_get_cnt (const struct var_set *vs)
634 return vs->get_cnt (vs);
637 /* Return variable with index IDX in VS.
638 IDX must be less than the number of variables in VS. */
639 static struct variable *
640 var_set_get_var (const struct var_set *vs, size_t idx)
643 assert (idx < var_set_get_cnt (vs));
645 return vs->get_var (vs, idx);
648 /* Returns the variable in VS named NAME, or a null pointer if VS
649 contains no variable with that name. */
651 var_set_lookup_var (const struct var_set *vs, const char *name)
654 return (var_set_lookup_var_idx (vs, name, &idx)
655 ? var_set_get_var (vs, idx)
659 /* If VS contains a variable named NAME, sets *IDX to its index
660 and returns true. Otherwise, returns false. */
662 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
666 assert (name != NULL);
667 assert (strlen (name) <= VAR_NAME_LEN);
669 return vs->lookup_var_idx (vs, name, idx);
674 var_set_destroy (struct var_set *vs)
680 /* Returns the number of variables in VS. */
682 dict_var_set_get_cnt (const struct var_set *vs)
684 struct dictionary *d = vs->aux;
686 return dict_get_var_cnt (d);
689 /* Return variable with index IDX in VS.
690 IDX must be less than the number of variables in VS. */
691 static struct variable *
692 dict_var_set_get_var (const struct var_set *vs, size_t idx)
694 struct dictionary *d = vs->aux;
696 return dict_get_var (d, idx);
699 /* If VS contains a variable named NAME, sets *IDX to its index
700 and returns true. Otherwise, returns false. */
702 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
705 struct dictionary *d = vs->aux;
706 struct variable *v = dict_lookup_var (d, name);
709 *idx = var_get_dict_index (v);
718 dict_var_set_destroy (struct var_set *vs)
723 /* Returns a variable set based on D. */
725 var_set_create_from_dict (const struct dictionary *d)
727 struct var_set *vs = xmalloc (sizeof *vs);
728 vs->get_cnt = dict_var_set_get_cnt;
729 vs->get_var = dict_var_set_get_var;
730 vs->lookup_var_idx = dict_var_set_lookup_var_idx;
731 vs->destroy = dict_var_set_destroy;
732 vs->aux = (void *) d;
736 /* A variable set based on an array. */
739 struct variable *const *var;/* Array of variables. */
740 size_t var_cnt; /* Number of elements in var. */
741 struct hsh_table *name_tab; /* Hash from variable names to variables. */
744 /* Returns the number of variables in VS. */
746 array_var_set_get_cnt (const struct var_set *vs)
748 struct array_var_set *avs = vs->aux;
753 /* Return variable with index IDX in VS.
754 IDX must be less than the number of variables in VS. */
755 static struct variable *
756 array_var_set_get_var (const struct var_set *vs, size_t idx)
758 struct array_var_set *avs = vs->aux;
760 return (struct variable *) avs->var[idx];
763 /* If VS contains a variable named NAME, sets *IDX to its index
764 and returns true. Otherwise, returns false. */
766 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
769 struct array_var_set *avs = vs->aux;
770 struct variable *v, *const *vpp;
772 v = var_create (name, 0);
773 vpp = hsh_find (avs->name_tab, &v);
778 *idx = vpp - avs->var;
787 array_var_set_destroy (struct var_set *vs)
789 struct array_var_set *avs = vs->aux;
791 hsh_destroy (avs->name_tab);
796 /* Returns a variable set based on the VAR_CNT variables in
799 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
802 struct array_var_set *avs;
805 vs = xmalloc (sizeof *vs);
806 vs->get_cnt = array_var_set_get_cnt;
807 vs->get_var = array_var_set_get_var;
808 vs->lookup_var_idx = array_var_set_lookup_var_idx;
809 vs->destroy = array_var_set_destroy;
810 vs->aux = avs = xmalloc (sizeof *avs);
812 avs->var_cnt = var_cnt;
813 avs->name_tab = hsh_create (2 * var_cnt,
814 compare_var_ptrs_by_name, hash_var_ptr_by_name,
816 for (i = 0; i < var_cnt; i++)
817 if (hsh_insert (avs->name_tab, (void *) &var[i]) != NULL)
819 var_set_destroy (vs);