1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include <language/lexer/variable-parser.h>
26 #include <data/dictionary.h>
27 #include <data/procedure.h>
28 #include <data/variable.h>
29 #include <libpspp/alloc.h>
30 #include <libpspp/assertion.h>
31 #include <libpspp/bit-vector.h>
32 #include <libpspp/hash.h>
33 #include <libpspp/message.h>
34 #include <libpspp/misc.h>
35 #include <libpspp/pool.h>
36 #include <libpspp/str.h>
39 #define _(msgid) gettext (msgid)
41 static struct variable * var_set_get_var (const struct var_set *, size_t );
43 static struct variable *var_set_lookup_var (const struct var_set *,
46 static bool var_set_lookup_var_idx (const struct var_set *, const char *,
51 /* Parses a name as a variable within VS. Sets *IDX to the
52 variable's index and returns true if successful. On failure
53 emits an error message and returns false. */
55 parse_vs_variable_idx (struct lexer *lexer, const struct var_set *vs,
60 if (lex_token (lexer) != T_ID)
62 lex_error (lexer, _("expecting variable name"));
65 else if (var_set_lookup_var_idx (vs, lex_tokid (lexer), idx))
72 msg (SE, _("%s is not a variable name."), lex_tokid (lexer));
77 /* Parses a name as a variable within VS and returns the variable
78 if successful. On failure emits an error message and returns
80 static struct variable *
81 parse_vs_variable (struct lexer *lexer, const struct var_set *vs)
84 return parse_vs_variable_idx (lexer, vs, &idx) ? var_set_get_var (vs, idx) : NULL;
87 /* Parses a variable name in dictionary D and returns the
88 variable if successful. On failure emits an error message and
89 returns a null pointer. */
91 parse_variable (struct lexer *lexer, const struct dictionary *d)
93 struct var_set *vs = var_set_create_from_dict (d);
94 struct variable *var = parse_vs_variable (lexer, vs);
99 /* Parses a set of variables from dictionary D given options
100 OPTS. Resulting list of variables stored in *VAR and the
101 number of variables into *CNT. Returns true only if
104 parse_variables (struct lexer *lexer, const struct dictionary *d,
105 struct variable ***var,
106 size_t *cnt, int opts)
112 assert (var != NULL);
113 assert (cnt != NULL);
115 vs = var_set_create_from_dict (d);
116 success = parse_var_set_vars (lexer, vs, var, cnt, opts);
117 var_set_destroy (vs);
121 /* Parses a set of variables from dictionary D given options
122 OPTS. Resulting list of variables stored in *VARS and the
123 number of variables into *VAR_CNT. Returns true only if
124 successful. Same behavior as parse_variables, except that all
125 allocations are taken from the given POOL. */
127 parse_variables_pool (struct lexer *lexer, struct pool *pool,
128 const struct dictionary *dict,
129 struct variable ***vars, size_t *var_cnt, int opts)
133 /* PV_APPEND is unsafe because parse_variables would free the
134 existing names on failure, but those names are presumably
135 already in the pool, which would attempt to re-free it
137 assert (!(opts & PV_APPEND));
139 retval = parse_variables (lexer, dict, vars, var_cnt, opts);
141 pool_register (pool, free, *vars);
145 /* Parses a variable name from VS. If successful, sets *IDX to
146 the variable's index in VS, *CLASS to the variable's
147 dictionary class, and returns true. Returns false on
150 parse_var_idx_class (struct lexer *lexer, const struct var_set *vs,
152 enum dict_class *class)
154 if (!parse_vs_variable_idx (lexer, vs, idx))
157 *class = dict_class_from_id (var_get_name (var_set_get_var (vs, *idx)));
161 /* Add the variable from VS with index IDX to the list of
162 variables V that has *NV elements and room for *MV.
163 Uses and updates INCLUDED to avoid duplicates if indicated by
164 PV_OPTS, which also affects what variables are allowed in
167 add_variable (struct variable ***v, size_t *nv, size_t *mv,
168 char *included, int pv_opts,
169 const struct var_set *vs, size_t idx)
171 struct variable *add = var_set_get_var (vs, idx);
172 const char *add_name = var_get_name (add);
174 if ((pv_opts & PV_NUMERIC) && !var_is_numeric (add))
175 msg (SW, _("%s is not a numeric variable. It will not be "
176 "included in the variable list."), add_name);
177 else if ((pv_opts & PV_STRING) && !var_is_alpha (add))
178 msg (SE, _("%s is not a string variable. It will not be "
179 "included in the variable list."), add_name);
180 else if ((pv_opts & PV_NO_SCRATCH)
181 && dict_class_from_id (add_name) == DC_SCRATCH)
182 msg (SE, _("Scratch variables (such as %s) are not allowed "
184 else if ((pv_opts & (PV_SAME_TYPE | PV_SAME_WIDTH)) && *nv
185 && var_get_type (add) != var_get_type ((*v)[0]))
186 msg (SE, _("%s and %s are not the same type. All variables in "
187 "this variable list must be of the same type. %s "
188 "will be omitted from the list."),
189 var_get_name ((*v)[0]), add_name, add_name);
190 else if ((pv_opts & PV_SAME_WIDTH) && *nv
191 && var_get_width (add) != var_get_width ((*v)[0]))
192 msg (SE, _("%s and %s are string variables with different widths. "
193 "All variables in this variable list must have the "
194 "same width. %s will be omttied from the list."),
195 var_get_name ((*v)[0]), add_name, add_name);
196 else if ((pv_opts & PV_NO_DUPLICATE) && included[idx])
197 msg (SE, _("Variable %s appears twice in variable list."), add_name);
198 else if ((pv_opts & PV_DUPLICATE) || !included[idx])
203 *v = xnrealloc (*v, *mv, sizeof **v);
206 if (included != NULL)
211 /* Adds the variables in VS with indexes FIRST_IDX through
212 LAST_IDX, inclusive, to the list of variables V that has *NV
213 elements and room for *MV. Uses and updates INCLUDED to avoid
214 duplicates if indicated by PV_OPTS, which also affects what
215 variables are allowed in appropriate ways. */
217 add_variables (struct variable ***v, size_t *nv, size_t *mv, char *included,
219 const struct var_set *vs, int first_idx, int last_idx,
220 enum dict_class class)
224 for (i = first_idx; i <= last_idx; i++)
225 if (dict_class_from_id (var_get_name (var_set_get_var (vs, i))) == class)
226 add_variable (v, nv, mv, included, pv_opts, vs, i);
229 /* Note that if parse_variables() returns false, *v is free()'d.
230 Conversely, if parse_variables() returns true, then *nv is
231 nonzero and *v is non-NULL. */
233 parse_var_set_vars (struct lexer *lexer, const struct var_set *vs,
234 struct variable ***v, size_t *nv,
244 /* At most one of PV_NUMERIC, PV_STRING, PV_SAME_TYPE,
245 PV_SAME_WIDTH may be specified. */
246 assert (((pv_opts & PV_NUMERIC) != 0)
247 + ((pv_opts & PV_STRING) != 0)
248 + ((pv_opts & PV_SAME_TYPE) != 0)
249 + ((pv_opts & PV_SAME_WIDTH) != 0) <= 1);
251 /* PV_DUPLICATE and PV_NO_DUPLICATE are incompatible. */
252 assert (!(pv_opts & PV_DUPLICATE) || !(pv_opts & PV_NO_DUPLICATE));
254 if (!(pv_opts & PV_APPEND))
263 if (!(pv_opts & PV_DUPLICATE))
267 included = xcalloc (var_set_get_cnt (vs), sizeof *included);
268 for (i = 0; i < *nv; i++)
271 if (!var_set_lookup_var_idx (vs, var_get_name ((*v)[i]), &index))
281 if (lex_match (lexer, T_ALL))
282 add_variables (v, nv, &mv, included, pv_opts,
283 vs, 0, var_set_get_cnt (vs) - 1, DC_ORDINARY);
286 enum dict_class class;
289 if (!parse_var_idx_class (lexer, vs, &first_idx, &class))
292 if (!lex_match (lexer, T_TO))
293 add_variable (v, nv, &mv, included, pv_opts, vs, first_idx);
297 enum dict_class last_class;
298 struct variable *first_var, *last_var;
300 if (!parse_var_idx_class (lexer, vs, &last_idx, &last_class))
303 first_var = var_set_get_var (vs, first_idx);
304 last_var = var_set_get_var (vs, last_idx);
306 if (last_idx < first_idx)
308 const char *first_name = var_get_name (first_var);
309 const char *last_name = var_get_name (last_var);
310 msg (SE, _("%s TO %s is not valid syntax since %s "
311 "precedes %s in the dictionary."),
312 first_name, last_name, first_name, last_name);
316 if (class != last_class)
318 msg (SE, _("When using the TO keyword to specify several "
319 "variables, both variables must be from "
320 "the same variable dictionaries, of either "
321 "ordinary, scratch, or system variables. "
322 "%s is a %s variable, whereas %s is %s."),
323 var_get_name (first_var), dict_class_to_name (class),
324 var_get_name (last_var),
325 dict_class_to_name (last_class));
329 add_variables (v, nv, &mv, included, pv_opts,
330 vs, first_idx, last_idx, class);
334 if (pv_opts & PV_SINGLE)
336 lex_match (lexer, ',');
338 while (lex_token (lexer) == T_ALL
339 || (lex_token (lexer) == T_ID && var_set_lookup_var (vs, lex_tokid (lexer)) != NULL));
355 /* Extracts a numeric suffix from variable name S, copying it
356 into string R. Sets *D to the length of R and *N to its
359 extract_num (char *s, char *r, int *n, int *d)
363 /* Find first digit. */
364 cp = s + strlen (s) - 1;
365 while (isdigit ((unsigned char) *cp) && cp > s)
370 strncpy (r, s, cp - s);
373 /* Count initial zeros. */
382 while (isdigit ((unsigned char) *cp))
385 *n = (*n * 10) + (*cp - '0');
390 if (*n == 0 && *d == 0)
392 msg (SE, _("incorrect use of TO convention"));
398 /* Parses a list of variable names according to the DATA LIST version
399 of the TO convention. */
401 parse_DATA_LIST_vars (struct lexer *lexer, char ***names, size_t *nnames, int pv_opts)
407 char name1[LONG_NAME_LEN + 1], name2[LONG_NAME_LEN + 1];
408 char root1[LONG_NAME_LEN + 1], root2[LONG_NAME_LEN + 1];
411 assert (names != NULL);
412 assert (nnames != NULL);
413 assert ((pv_opts & ~(PV_APPEND | PV_SINGLE
414 | PV_NO_SCRATCH | PV_NO_DUPLICATE)) == 0);
415 /* FIXME: PV_NO_DUPLICATE is not implemented. */
417 if (pv_opts & PV_APPEND)
418 nvar = mvar = *nnames;
427 if (lex_token (lexer) != T_ID)
429 lex_error (lexer, "expecting variable name");
432 if (dict_class_from_id (lex_tokid (lexer)) == DC_SCRATCH
433 && (pv_opts & PV_NO_SCRATCH))
435 msg (SE, _("Scratch variables not allowed here."));
438 strcpy (name1, lex_tokid (lexer));
440 if (lex_token (lexer) == T_TO)
443 if (lex_token (lexer) != T_ID)
445 lex_error (lexer, "expecting variable name");
448 strcpy (name2, lex_tokid (lexer));
451 if (!extract_num (name1, root1, &n1, &d1)
452 || !extract_num (name2, root2, &n2, &d2))
455 if (strcasecmp (root1, root2))
457 msg (SE, _("Prefixes don't match in use of TO convention."));
462 msg (SE, _("Bad bounds in use of TO convention."));
468 if (mvar < nvar + (n2 - n1 + 1))
470 mvar += ROUND_UP (n2 - n1 + 1, 16);
471 *names = xnrealloc (*names, mvar, sizeof **names);
474 for (n = n1; n <= n2; n++)
476 char name[LONG_NAME_LEN + 1];
477 sprintf (name, "%s%0*d", root1, d1, n);
478 (*names)[nvar] = xstrdup (name);
487 *names = xnrealloc (*names, mvar, sizeof **names);
489 (*names)[nvar++] = xstrdup (name1);
492 lex_match (lexer, ',');
494 if (pv_opts & PV_SINGLE)
497 while (lex_token (lexer) == T_ID);
505 for (i = 0; i < nvar; i++)
514 /* Registers each of the NAMES[0...NNAMES - 1] in POOL, as well
517 register_vars_pool (struct pool *pool, char **names, size_t nnames)
521 for (i = 0; i < nnames; i++)
522 pool_register (pool, free, names[i]);
523 pool_register (pool, free, names);
526 /* Parses a list of variable names according to the DATA LIST
527 version of the TO convention. Same args as
528 parse_DATA_LIST_vars(), except that all allocations are taken
529 from the given POOL. */
531 parse_DATA_LIST_vars_pool (struct lexer *lexer, struct pool *pool,
532 char ***names, size_t *nnames, int pv_opts)
536 /* PV_APPEND is unsafe because parse_DATA_LIST_vars would free
537 the existing names on failure, but those names are
538 presumably already in the pool, which would attempt to
540 assert (!(pv_opts & PV_APPEND));
542 retval = parse_DATA_LIST_vars (lexer, names, nnames, pv_opts);
544 register_vars_pool (pool, *names, *nnames);
548 /* Parses a list of variables where some of the variables may be
549 existing and the rest are to be created. Same args as
550 parse_DATA_LIST_vars(). */
552 parse_mixed_vars (struct lexer *lexer, const struct dictionary *dict,
553 char ***names, size_t *nnames, int pv_opts)
557 assert (names != NULL);
558 assert (nnames != NULL);
559 assert ((pv_opts & ~PV_APPEND) == 0);
561 if (!(pv_opts & PV_APPEND))
566 while (lex_token (lexer) == T_ID || lex_token (lexer) == T_ALL)
568 if (lex_token (lexer) == T_ALL || dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
573 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
575 *names = xnrealloc (*names, *nnames + nv, sizeof **names);
576 for (i = 0; i < nv; i++)
577 (*names)[*nnames + i] = xstrdup (var_get_name (v[i]));
581 else if (!parse_DATA_LIST_vars (lexer, names, nnames, PV_APPEND))
587 for (i = 0; i < *nnames; i++)
595 /* Parses a list of variables where some of the variables may be
596 existing and the rest are to be created. Same args as
597 parse_mixed_vars(), except that all allocations are taken
598 from the given POOL. */
600 parse_mixed_vars_pool (struct lexer *lexer, const struct dictionary *dict, struct pool *pool,
601 char ***names, size_t *nnames, int pv_opts)
605 /* PV_APPEND is unsafe because parse_mixed_vars_pool would free
606 the existing names on failure, but those names are
607 presumably already in the pool, which would attempt to
609 assert (!(pv_opts & PV_APPEND));
611 retval = parse_mixed_vars (lexer, dict, names, nnames, pv_opts);
613 register_vars_pool (pool, *names, *nnames);
617 /* A set of variables. */
620 size_t (*get_cnt) (const struct var_set *);
621 struct variable *(*get_var) (const struct var_set *, size_t idx);
622 bool (*lookup_var_idx) (const struct var_set *, const char *, size_t *);
623 void (*destroy) (struct var_set *);
627 /* Returns the number of variables in VS. */
629 var_set_get_cnt (const struct var_set *vs)
633 return vs->get_cnt (vs);
636 /* Return variable with index IDX in VS.
637 IDX must be less than the number of variables in VS. */
638 static struct variable *
639 var_set_get_var (const struct var_set *vs, size_t idx)
642 assert (idx < var_set_get_cnt (vs));
644 return vs->get_var (vs, idx);
647 /* Returns the variable in VS named NAME, or a null pointer if VS
648 contains no variable with that name. */
650 var_set_lookup_var (const struct var_set *vs, const char *name)
653 return (var_set_lookup_var_idx (vs, name, &idx)
654 ? var_set_get_var (vs, idx)
658 /* If VS contains a variable named NAME, sets *IDX to its index
659 and returns true. Otherwise, returns false. */
661 var_set_lookup_var_idx (const struct var_set *vs, const char *name,
665 assert (name != NULL);
666 assert (strlen (name) <= LONG_NAME_LEN);
668 return vs->lookup_var_idx (vs, name, idx);
673 var_set_destroy (struct var_set *vs)
679 /* Returns the number of variables in VS. */
681 dict_var_set_get_cnt (const struct var_set *vs)
683 struct dictionary *d = vs->aux;
685 return dict_get_var_cnt (d);
688 /* Return variable with index IDX in VS.
689 IDX must be less than the number of variables in VS. */
690 static struct variable *
691 dict_var_set_get_var (const struct var_set *vs, size_t idx)
693 struct dictionary *d = vs->aux;
695 return dict_get_var (d, idx);
698 /* If VS contains a variable named NAME, sets *IDX to its index
699 and returns true. Otherwise, returns false. */
701 dict_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
704 struct dictionary *d = vs->aux;
705 struct variable *v = dict_lookup_var (d, name);
708 *idx = var_get_dict_index (v);
717 dict_var_set_destroy (struct var_set *vs)
722 /* Returns a variable set based on D. */
724 var_set_create_from_dict (const struct dictionary *d)
726 struct var_set *vs = xmalloc (sizeof *vs);
727 vs->get_cnt = dict_var_set_get_cnt;
728 vs->get_var = dict_var_set_get_var;
729 vs->lookup_var_idx = dict_var_set_lookup_var_idx;
730 vs->destroy = dict_var_set_destroy;
731 vs->aux = (void *) d;
735 /* A variable set based on an array. */
738 struct variable *const *var;/* Array of variables. */
739 size_t var_cnt; /* Number of elements in var. */
740 struct hsh_table *name_tab; /* Hash from variable names to variables. */
743 /* Returns the number of variables in VS. */
745 array_var_set_get_cnt (const struct var_set *vs)
747 struct array_var_set *avs = vs->aux;
752 /* Return variable with index IDX in VS.
753 IDX must be less than the number of variables in VS. */
754 static struct variable *
755 array_var_set_get_var (const struct var_set *vs, size_t idx)
757 struct array_var_set *avs = vs->aux;
759 return (struct variable *) avs->var[idx];
762 /* If VS contains a variable named NAME, sets *IDX to its index
763 and returns true. Otherwise, returns false. */
765 array_var_set_lookup_var_idx (const struct var_set *vs, const char *name,
768 struct array_var_set *avs = vs->aux;
769 struct variable *v, *const *vpp;
771 v = var_create (name, 0);
772 vpp = hsh_find (avs->name_tab, &v);
777 *idx = vpp - avs->var;
786 array_var_set_destroy (struct var_set *vs)
788 struct array_var_set *avs = vs->aux;
790 hsh_destroy (avs->name_tab);
795 /* Returns a variable set based on the VAR_CNT variables in
798 var_set_create_from_array (struct variable *const *var, size_t var_cnt)
801 struct array_var_set *avs;
804 vs = xmalloc (sizeof *vs);
805 vs->get_cnt = array_var_set_get_cnt;
806 vs->get_var = array_var_set_get_var;
807 vs->lookup_var_idx = array_var_set_lookup_var_idx;
808 vs->destroy = array_var_set_destroy;
809 vs->aux = avs = xmalloc (sizeof *avs);
811 avs->var_cnt = var_cnt;
812 avs->name_tab = hsh_create (2 * var_cnt,
813 compare_var_ptrs_by_name, hash_var_ptr_by_name,
815 for (i = 0; i < var_cnt; i++)
816 if (hsh_insert (avs->name_tab, (void *) &var[i]) != NULL)
818 var_set_destroy (vs);