1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "data/case.h"
27 #include "data/dictionary.h"
28 #include "data/settings.h"
29 #include "data/variable.h"
30 #include "language/expressions/helpers.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/i18n.h"
37 #include "libpspp/message.h"
38 #include "libpspp/misc.h"
39 #include "libpspp/pool.h"
40 #include "libpspp/str.h"
42 #include "gl/c-strcase.h"
43 #include "gl/xalloc.h"
47 /* Recursive descent parser in order of increasing precedence. */
48 typedef union any_node *parse_recursively_func (struct lexer *, struct expression *);
49 static parse_recursively_func parse_or, parse_and, parse_not;
50 static parse_recursively_func parse_rel, parse_add, parse_mul;
51 static parse_recursively_func parse_neg, parse_exp;
52 static parse_recursively_func parse_primary;
53 static parse_recursively_func parse_vector_element, parse_function;
55 /* Utility functions. */
56 static struct expression *expr_create (struct dataset *ds);
57 atom_type expr_node_returns (const union any_node *);
59 static const char *atom_type_name (atom_type);
60 static struct expression *finish_expression (union any_node *,
62 static bool type_check (const union any_node *, enum val_type expected_type);
63 static union any_node *allocate_unary_variable (struct expression *,
64 const struct variable *);
66 /* Public functions. */
68 /* Parses an expression of the given TYPE. If DS is nonnull then variables and
69 vectors within it may be referenced within the expression; otherwise, the
70 expression must not reference any variables or vectors. Returns the new
71 expression if successful or a null pointer otherwise. */
73 expr_parse (struct lexer *lexer, struct dataset *ds, enum val_type type)
75 assert (val_type_is_valid (type));
77 struct expression *e = expr_create (ds);
78 union any_node *n = parse_or (lexer, e);
79 if (!n || !type_check (n, type))
85 return finish_expression (expr_optimize (n, e), e);
88 /* Parses a boolean expression, otherwise similar to expr_parse(). */
90 expr_parse_bool (struct lexer *lexer, struct dataset *ds)
92 struct expression *e = expr_create (ds);
93 union any_node *n = parse_or (lexer, e);
100 atom_type actual_type = expr_node_returns (n);
101 if (actual_type == OP_number)
102 n = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, n,
103 expr_allocate_string (e, ss_empty ()));
104 else if (actual_type != OP_boolean)
106 msg (SE, _("Type mismatch: expression has %s type, "
107 "but a boolean value is required here."),
108 atom_type_name (actual_type));
113 return finish_expression (expr_optimize (n, e), e);
116 /* Parses a numeric expression that is intended to be assigned to newly created
117 variable NEW_VAR_NAME. (This allows for a better error message if the
118 expression is not numeric.) Otherwise similar to expr_parse(). */
120 expr_parse_new_variable (struct lexer *lexer, struct dataset *ds,
121 const char *new_var_name)
123 struct expression *e = expr_create (ds);
124 union any_node *n = parse_or (lexer, e);
131 atom_type actual_type = expr_node_returns (n);
132 if (actual_type != OP_number && actual_type != OP_boolean)
134 msg (SE, _("This command tries to create a new variable %s by assigning a "
135 "string value to it, but this is not supported. Use "
136 "the STRING command to create the new variable with the "
137 "correct width before assigning to it, e.g. STRING %s(A20)."),
138 new_var_name, new_var_name);
143 return finish_expression (expr_optimize (n, e), e);
146 /* Free expression E. */
148 expr_free (struct expression *e)
151 pool_destroy (e->expr_pool);
155 expr_parse_any (struct lexer *lexer, struct dataset *ds, bool optimize)
158 struct expression *e;
160 e = expr_create (ds);
161 n = parse_or (lexer, e);
169 n = expr_optimize (n, e);
170 return finish_expression (n, e);
173 /* Finishing up expression building. */
175 /* Height of an expression's stacks. */
178 int number_height; /* Height of number stack. */
179 int string_height; /* Height of string stack. */
182 /* Stack heights used by different kinds of arguments. */
183 static const struct stack_heights on_number_stack = {1, 0};
184 static const struct stack_heights on_string_stack = {0, 1};
185 static const struct stack_heights not_on_stack = {0, 0};
187 /* Returns the stack heights used by an atom of the given
189 static const struct stack_heights *
190 atom_type_stack (atom_type type)
192 assert (is_atom (type));
198 return &on_number_stack;
201 return &on_string_stack;
211 return ¬_on_stack;
218 /* Measures the stack height needed for node N, supposing that
219 the stack height is initially *HEIGHT and updating *HEIGHT to
220 the final stack height. Updates *MAX, if necessary, to
221 reflect the maximum intermediate or final height. */
223 measure_stack (const union any_node *n,
224 struct stack_heights *height, struct stack_heights *max)
226 const struct stack_heights *return_height;
228 if (is_composite (n->type))
230 struct stack_heights args;
234 for (i = 0; i < n->composite.n_args; i++)
235 measure_stack (n->composite.args[i], &args, max);
237 return_height = atom_type_stack (operations[n->type].returns);
240 return_height = atom_type_stack (n->type);
242 height->number_height += return_height->number_height;
243 height->string_height += return_height->string_height;
245 if (height->number_height > max->number_height)
246 max->number_height = height->number_height;
247 if (height->string_height > max->string_height)
248 max->string_height = height->string_height;
251 /* Allocates stacks within E sufficient for evaluating node N. */
253 allocate_stacks (union any_node *n, struct expression *e)
255 struct stack_heights initial = {0, 0};
256 struct stack_heights max = {0, 0};
258 measure_stack (n, &initial, &max);
259 e->number_stack = pool_alloc (e->expr_pool,
260 sizeof *e->number_stack * max.number_height);
261 e->string_stack = pool_alloc (e->expr_pool,
262 sizeof *e->string_stack * max.string_height);
265 /* Finalizes expression E for evaluating node N. */
266 static struct expression *
267 finish_expression (union any_node *n, struct expression *e)
269 /* Allocate stacks. */
270 allocate_stacks (n, e);
272 /* Output postfix representation. */
275 /* The eval_pool might have been used for allocating strings
276 during optimization. We need to keep those strings around
277 for all subsequent evaluations, so start a new eval_pool. */
278 e->eval_pool = pool_create_subpool (e->expr_pool);
283 /* Verifies that expression E, whose root node is *N, can be
284 converted to type EXPECTED_TYPE, inserting a conversion at *N
285 if necessary. Returns true if successful, false on failure. */
287 type_check (const union any_node *n, enum val_type expected_type)
289 atom_type actual_type = expr_node_returns (n);
291 switch (expected_type)
294 if (actual_type != OP_number && actual_type != OP_boolean)
296 msg (SE, _("Type mismatch: expression has %s type, "
297 "but a numeric value is required here."),
298 atom_type_name (actual_type));
304 if (actual_type != OP_string)
306 msg (SE, _("Type mismatch: expression has %s type, "
307 "but a string value is required here."),
308 atom_type_name (actual_type));
320 /* Recursive-descent expression parser. */
322 /* Considers whether *NODE may be coerced to type REQUIRED_TYPE.
323 Returns true if possible, false if disallowed.
325 If DO_COERCION is false, then *NODE is not modified and there
328 If DO_COERCION is true, we perform the coercion if possible,
329 modifying *NODE if necessary. If the coercion is not possible
330 then we free *NODE and set *NODE to a null pointer.
332 This function's interface is somewhat awkward. Use one of the
333 wrapper functions type_coercion(), type_coercion_assert(), or
334 is_coercible() instead. */
336 type_coercion_core (struct expression *e,
337 atom_type required_type,
338 union any_node **node,
339 const char *operator_name,
342 atom_type actual_type;
344 assert (!!do_coercion == (e != NULL));
347 /* Propagate error. Whatever caused the original error
348 already emitted an error message. */
352 actual_type = expr_node_returns (*node);
353 if (actual_type == required_type)
359 switch (required_type)
362 if (actual_type == OP_boolean)
364 /* To enforce strict typing rules, insert Boolean to
365 numeric "conversion". This conversion is a no-op,
366 so it will be removed later. */
368 *node = expr_allocate_unary (e, OP_BOOLEAN_TO_NUM, *node);
374 /* No coercion to string. */
378 if (actual_type == OP_number)
380 /* Convert numeric to boolean. */
383 union any_node *op_name;
385 op_name = expr_allocate_string (e, ss_cstr (operator_name));
386 *node = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, *node,
398 if ((*node)->type == OP_format
399 && fmt_check_input (&(*node)->format.f)
400 && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
404 (*node)->type = OP_ni_format;
412 if ((*node)->type == OP_format
413 && fmt_check_output (&(*node)->format.f)
414 && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
418 (*node)->type = OP_no_format;
425 if ((*node)->type == OP_NUM_VAR)
428 *node = (*node)->composite.args[0];
434 if ((*node)->type == OP_STR_VAR)
437 *node = (*node)->composite.args[0];
443 if ((*node)->type == OP_NUM_VAR || (*node)->type == OP_STR_VAR)
446 *node = (*node)->composite.args[0];
452 if ((*node)->type == OP_number
453 && floor ((*node)->number.n) == (*node)->number.n
454 && (*node)->number.n > 0 && (*node)->number.n < INT_MAX)
457 *node = expr_allocate_pos_int (e, (*node)->number.n);
468 msg (SE, _("Type mismatch while applying %s operator: "
469 "cannot convert %s to %s."),
471 atom_type_name (actual_type), atom_type_name (required_type));
477 /* Coerces *NODE to type REQUIRED_TYPE, and returns success. If
478 *NODE cannot be coerced to the desired type then we issue an
479 error message about operator OPERATOR_NAME and free *NODE. */
481 type_coercion (struct expression *e,
482 atom_type required_type, union any_node **node,
483 const char *operator_name)
485 return type_coercion_core (e, required_type, node, operator_name, true);
488 /* Coerces *NODE to type REQUIRED_TYPE.
489 Assert-fails if the coercion is disallowed. */
491 type_coercion_assert (struct expression *e,
492 atom_type required_type, union any_node **node)
494 int success = type_coercion_core (e, required_type, node, NULL, true);
498 /* Returns true if *NODE may be coerced to type REQUIRED_TYPE,
501 is_coercible (atom_type required_type, union any_node *const *node)
503 return type_coercion_core (NULL, required_type,
504 (union any_node **) node, NULL, false);
507 /* Returns true if ACTUAL_TYPE is a kind of REQUIRED_TYPE, false
510 is_compatible (atom_type required_type, atom_type actual_type)
512 return (required_type == actual_type
513 || (required_type == OP_var
514 && (actual_type == OP_num_var || actual_type == OP_str_var)));
517 /* How to parse an operator. */
520 int token; /* Token representing operator. */
521 operation_type type; /* Operation type representing operation. */
522 const char *name; /* Name of operator. */
525 /* Attempts to match the current token against the tokens for the
526 OP_CNT operators in OPS[]. If successful, returns true
527 and, if OPERATOR is non-null, sets *OPERATOR to the operator.
528 On failure, returns false and, if OPERATOR is non-null, sets
529 *OPERATOR to a null pointer. */
531 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops,
532 const struct operator **operator)
534 const struct operator *op;
536 for (op = ops; op < ops + n_ops; op++)
537 if (lex_token (lexer) == op->token)
539 if (op->token != T_NEG_NUM)
541 if (operator != NULL)
545 if (operator != NULL)
551 check_operator (const struct operator *op, int n_args, atom_type arg_type)
553 const struct operation *o;
557 o = &operations[op->type];
558 assert (o->n_args == n_args);
559 assert ((o->flags & OPF_ARRAY_OPERAND) == 0);
560 for (i = 0; i < n_args; i++)
561 assert (is_compatible (arg_type, o->args[i]));
566 check_binary_operators (const struct operator ops[], size_t n_ops,
571 for (i = 0; i < n_ops; i++)
572 check_operator (&ops[i], 2, arg_type);
577 get_operand_type (const struct operator *op)
579 return operations[op->type].args[0];
582 /* Parses a chain of left-associative operator/operand pairs.
583 There are OP_CNT operators, specified in OPS[]. The
584 operators' operands must all be the same type. The next
585 higher level is parsed by PARSE_NEXT_LEVEL. If CHAIN_WARNING
586 is non-null, then it will be issued as a warning if more than
587 one operator/operand pair is parsed. */
588 static union any_node *
589 parse_binary_operators (struct lexer *lexer, struct expression *e, union any_node *node,
590 const struct operator ops[], size_t n_ops,
591 parse_recursively_func *parse_next_level,
592 const char *chain_warning)
594 atom_type operand_type = get_operand_type (&ops[0]);
596 const struct operator *operator;
598 assert (check_binary_operators (ops, n_ops, operand_type));
602 for (op_count = 0; match_operator (lexer, ops, n_ops, &operator); op_count++)
606 /* Convert the left-hand side to type OPERAND_TYPE. */
607 if (!type_coercion (e, operand_type, &node, operator->name))
610 /* Parse the right-hand side and coerce to type
612 rhs = parse_next_level (lexer, e);
613 if (!type_coercion (e, operand_type, &rhs, operator->name))
615 node = expr_allocate_binary (e, operator->type, node, rhs);
618 if (op_count > 1 && chain_warning != NULL)
619 msg (SW, "%s", chain_warning);
624 static union any_node *
625 parse_inverting_unary_operator (struct lexer *lexer, struct expression *e,
626 const struct operator *op,
627 parse_recursively_func *parse_next_level)
629 union any_node *node;
632 check_operator (op, 1, get_operand_type (op));
635 while (match_operator (lexer, op, 1, NULL))
638 node = parse_next_level (lexer, e);
640 && type_coercion (e, get_operand_type (op), &node, op->name)
641 && op_count % 2 != 0)
642 return expr_allocate_unary (e, op->type, node);
647 /* Parses the OR level. */
648 static union any_node *
649 parse_or (struct lexer *lexer, struct expression *e)
651 static const struct operator op =
652 { T_OR, OP_OR, "logical disjunction (`OR')" };
654 return parse_binary_operators (lexer, e, parse_and (lexer, e), &op, 1, parse_and, NULL);
657 /* Parses the AND level. */
658 static union any_node *
659 parse_and (struct lexer *lexer, struct expression *e)
661 static const struct operator op =
662 { T_AND, OP_AND, "logical conjunction (`AND')" };
664 return parse_binary_operators (lexer, e, parse_not (lexer, e),
665 &op, 1, parse_not, NULL);
668 /* Parses the NOT level. */
669 static union any_node *
670 parse_not (struct lexer *lexer, struct expression *e)
672 static const struct operator op
673 = { T_NOT, OP_NOT, "logical negation (`NOT')" };
674 return parse_inverting_unary_operator (lexer, e, &op, parse_rel);
677 /* Parse relational operators. */
678 static union any_node *
679 parse_rel (struct lexer *lexer, struct expression *e)
681 const char *chain_warning =
682 _("Chaining relational operators (e.g. `a < b < c') will "
683 "not produce the mathematically expected result. "
684 "Use the AND logical operator to fix the problem "
685 "(e.g. `a < b AND b < c'). "
686 "If chaining is really intended, parentheses will disable "
687 "this warning (e.g. `(a < b) < c'.)");
689 union any_node *node = parse_add (lexer, e);
694 switch (expr_node_returns (node))
699 static const struct operator ops[] =
701 { T_EQUALS, OP_EQ, "numeric equality (`=')" },
702 { T_EQ, OP_EQ, "numeric equality (`EQ')" },
703 { T_GE, OP_GE, "numeric greater-than-or-equal-to (`>=')" },
704 { T_GT, OP_GT, "numeric greater than (`>')" },
705 { T_LE, OP_LE, "numeric less-than-or-equal-to (`<=')" },
706 { T_LT, OP_LT, "numeric less than (`<')" },
707 { T_NE, OP_NE, "numeric inequality (`<>')" },
710 return parse_binary_operators (lexer, e, node, ops,
711 sizeof ops / sizeof *ops,
712 parse_add, chain_warning);
717 static const struct operator ops[] =
719 { T_EQUALS, OP_EQ_STRING, "string equality (`=')" },
720 { T_EQ, OP_EQ_STRING, "string equality (`EQ')" },
721 { T_GE, OP_GE_STRING, "string greater-than-or-equal-to (`>=')" },
722 { T_GT, OP_GT_STRING, "string greater than (`>')" },
723 { T_LE, OP_LE_STRING, "string less-than-or-equal-to (`<=')" },
724 { T_LT, OP_LT_STRING, "string less than (`<')" },
725 { T_NE, OP_NE_STRING, "string inequality (`<>')" },
728 return parse_binary_operators (lexer, e, node, ops,
729 sizeof ops / sizeof *ops,
730 parse_add, chain_warning);
738 /* Parses the addition and subtraction level. */
739 static union any_node *
740 parse_add (struct lexer *lexer, struct expression *e)
742 static const struct operator ops[] =
744 { T_PLUS, OP_ADD, "addition (`+')" },
745 { T_DASH, OP_SUB, "subtraction (`-')" },
746 { T_NEG_NUM, OP_ADD, "subtraction (`-')" },
749 return parse_binary_operators (lexer, e, parse_mul (lexer, e),
750 ops, sizeof ops / sizeof *ops,
754 /* Parses the multiplication and division level. */
755 static union any_node *
756 parse_mul (struct lexer *lexer, struct expression *e)
758 static const struct operator ops[] =
760 { T_ASTERISK, OP_MUL, "multiplication (`*')" },
761 { T_SLASH, OP_DIV, "division (`/')" },
764 return parse_binary_operators (lexer, e, parse_neg (lexer, e),
765 ops, sizeof ops / sizeof *ops,
769 /* Parses the unary minus level. */
770 static union any_node *
771 parse_neg (struct lexer *lexer, struct expression *e)
773 static const struct operator op = { T_DASH, OP_NEG, "negation (`-')" };
774 return parse_inverting_unary_operator (lexer, e, &op, parse_exp);
777 static union any_node *
778 parse_exp (struct lexer *lexer, struct expression *e)
780 static const struct operator op =
781 { T_EXP, OP_POW, "exponentiation (`**')" };
783 const char *chain_warning =
784 _("The exponentiation operator (`**') is left-associative, "
785 "even though right-associative semantics are more useful. "
786 "That is, `a**b**c' equals `(a**b)**c', not as `a**(b**c)'. "
787 "To disable this warning, insert parentheses.");
789 union any_node *lhs, *node;
790 bool negative = false;
792 if (lex_token (lexer) == T_NEG_NUM)
794 lhs = expr_allocate_number (e, -lex_tokval (lexer));
799 lhs = parse_primary (lexer, e);
801 node = parse_binary_operators (lexer, e, lhs, &op, 1,
802 parse_primary, chain_warning);
803 return negative ? expr_allocate_unary (e, OP_NEG, node) : node;
806 /* Parses system variables. */
807 static union any_node *
808 parse_sysvar (struct lexer *lexer, struct expression *e)
810 if (lex_match_id (lexer, "$CASENUM"))
811 return expr_allocate_nullary (e, OP_CASENUM);
812 else if (lex_match_id (lexer, "$DATE"))
814 static const char *months[12] =
816 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
817 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
820 time_t last_proc_time = time_of_last_procedure (e->ds);
825 time = localtime (&last_proc_time);
826 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
827 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
829 ss_alloc_substring (&s, ss_cstr (temp_buf));
830 return expr_allocate_string (e, s);
832 else if (lex_match_id (lexer, "$TRUE"))
833 return expr_allocate_boolean (e, 1.0);
834 else if (lex_match_id (lexer, "$FALSE"))
835 return expr_allocate_boolean (e, 0.0);
836 else if (lex_match_id (lexer, "$SYSMIS"))
837 return expr_allocate_number (e, SYSMIS);
838 else if (lex_match_id (lexer, "$JDATE"))
840 time_t time = time_of_last_procedure (e->ds);
841 struct tm *tm = localtime (&time);
842 return expr_allocate_number (e, expr_ymd_to_ofs (tm->tm_year + 1900,
846 else if (lex_match_id (lexer, "$TIME"))
848 time_t time = time_of_last_procedure (e->ds);
849 struct tm *tm = localtime (&time);
850 return expr_allocate_number (e,
851 expr_ymd_to_date (tm->tm_year + 1900,
854 + tm->tm_hour * 60 * 60.
858 else if (lex_match_id (lexer, "$LENGTH"))
859 return expr_allocate_number (e, settings_get_viewlength ());
860 else if (lex_match_id (lexer, "$WIDTH"))
861 return expr_allocate_number (e, settings_get_viewwidth ());
864 msg (SE, _("Unknown system variable %s."), lex_tokcstr (lexer));
869 /* Parses numbers, varnames, etc. */
870 static union any_node *
871 parse_primary (struct lexer *lexer, struct expression *e)
873 switch (lex_token (lexer))
876 if (lex_next_token (lexer, 1) == T_LPAREN)
878 /* An identifier followed by a left parenthesis may be
879 a vector element reference. If not, it's a function
881 if (e->ds != NULL && dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer)) != NULL)
882 return parse_vector_element (lexer, e);
884 return parse_function (lexer, e);
886 else if (lex_tokcstr (lexer)[0] == '$')
888 /* $ at the beginning indicates a system variable. */
889 return parse_sysvar (lexer, e);
891 else if (e->ds != NULL && dict_lookup_var (dataset_dict (e->ds), lex_tokcstr (lexer)))
893 /* It looks like a user variable.
894 (It could be a format specifier, but we'll assume
895 it's a variable unless proven otherwise. */
896 return allocate_unary_variable (e, parse_variable (lexer, dataset_dict (e->ds)));
900 /* Try to parse it as a format specifier. */
905 ok = parse_format_specifier (lexer, &fmt);
909 return expr_allocate_format (e, &fmt);
911 /* All attempts failed. */
912 msg (SE, _("Unknown identifier %s."), lex_tokcstr (lexer));
920 union any_node *node = expr_allocate_number (e, lex_tokval (lexer));
927 const char *dict_encoding;
928 union any_node *node;
931 dict_encoding = (e->ds != NULL
932 ? dict_get_encoding (dataset_dict (e->ds))
934 s = recode_string_pool (dict_encoding, "UTF-8", lex_tokcstr (lexer),
935 ss_length (lex_tokss (lexer)), e->expr_pool);
936 node = expr_allocate_string (e, ss_cstr (s));
944 /* Count number of left parentheses so that we can match them against
945 an equal number of right parentheses. This defeats trivial attempts
946 to exhaust the stack with a lot of left parentheses. (More
947 sophisticated attacks will still succeed.) */
949 while (lex_match (lexer, T_LPAREN))
952 union any_node *node = parse_or (lexer, e);
956 for (size_t i = 0; i < n; i++)
957 if (!lex_force_match (lexer, T_RPAREN))
964 lex_error (lexer, NULL);
969 static union any_node *
970 parse_vector_element (struct lexer *lexer, struct expression *e)
972 const struct vector *vector;
973 union any_node *element;
975 /* Find vector, skip token.
976 The caller must already have verified that the current token
977 is the name of a vector. */
978 vector = dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer));
979 assert (vector != NULL);
982 /* Skip left parenthesis token.
983 The caller must have verified that the lookahead is a left
985 assert (lex_token (lexer) == T_LPAREN);
988 element = parse_or (lexer, e);
989 if (!type_coercion (e, OP_number, &element, "vector indexing")
990 || !lex_match (lexer, T_RPAREN))
993 return expr_allocate_binary (e, (vector_get_type (vector) == VAL_NUMERIC
994 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR),
995 element, expr_allocate_vector (e, vector));
998 /* Individual function parsing. */
1000 const struct operation operations[OP_first + n_OP] = {
1001 #include "parse.inc"
1005 word_matches (const char **test, const char **name)
1007 size_t test_len = strcspn (*test, ".");
1008 size_t name_len = strcspn (*name, ".");
1009 if (test_len == name_len)
1011 if (buf_compare_case (*test, *name, test_len))
1014 else if (test_len < 3 || test_len > name_len)
1018 if (buf_compare_case (*test, *name, test_len))
1024 if (**test != **name)
1035 /* Returns 0 if TOKEN and FUNC do not match,
1036 1 if TOKEN is an acceptable abbreviation for FUNC,
1037 2 if TOKEN equals FUNC. */
1039 compare_function_names (const char *token_, const char *func_)
1041 const char *token = token_;
1042 const char *func = func_;
1043 while (*token || *func)
1044 if (!word_matches (&token, &func))
1046 return !c_strcasecmp (token_, func_) ? 2 : 1;
1050 lookup_function (const char *token,
1051 const struct operation **first,
1052 const struct operation **last)
1054 *first = *last = NULL;
1055 const struct operation *best = NULL;
1057 for (const struct operation *f = operations + OP_function_first;
1058 f <= operations + OP_function_last; f++)
1060 int score = compare_function_names (token, f->name);
1066 else if (score == 1 && !(f->flags & OPF_NO_ABBREV) && !best)
1075 const struct operation *f = best;
1076 while (f <= operations + OP_function_last
1077 && !c_strcasecmp (f->name, best->name))
1085 extract_min_valid (const char *s)
1087 char *p = strrchr (s, '.');
1089 || p[1] < '0' || p[1] > '9'
1090 || strspn (p + 1, "0123456789") != strlen (p + 1))
1093 return atoi (p + 1);
1097 function_arg_type (const struct operation *f, size_t arg_idx)
1099 assert (arg_idx < f->n_args || (f->flags & OPF_ARRAY_OPERAND));
1101 return f->args[arg_idx < f->n_args ? arg_idx : f->n_args - 1];
1105 match_function (union any_node **args, int n_args, const struct operation *f)
1109 if (n_args < f->n_args
1110 || (n_args > f->n_args && (f->flags & OPF_ARRAY_OPERAND) == 0)
1111 || n_args - (f->n_args - 1) < f->array_min_elems)
1114 for (i = 0; i < n_args; i++)
1115 if (!is_coercible (function_arg_type (f, i), &args[i]))
1122 coerce_function_args (struct expression *e, const struct operation *f,
1123 union any_node **args, size_t n_args)
1127 for (i = 0; i < n_args; i++)
1128 type_coercion_assert (e, function_arg_type (f, i), &args[i]);
1132 validate_function_args (const struct operation *f, int n_args, int min_valid)
1134 /* Count the function arguments that go into the trailing array (if any). We
1135 know that there must be at least the minimum number because
1136 match_function() already checked. */
1137 int array_n_args = n_args - (f->n_args - 1);
1138 assert (array_n_args >= f->array_min_elems);
1140 if ((f->flags & OPF_ARRAY_OPERAND)
1141 && array_n_args % f->array_granularity != 0)
1143 /* RANGE is the only case we have so far. It has paired arguments with
1144 one initial argument, and that's the only special case we deal with
1146 assert (f->array_granularity == 2);
1147 assert (n_args % 2 == 0);
1148 msg (SE, _("%s must have an odd number of arguments."), f->prototype);
1152 if (min_valid != -1)
1154 if (f->array_min_elems == 0)
1156 assert ((f->flags & OPF_MIN_VALID) == 0);
1157 msg (SE, _("%s function cannot accept suffix .%d to specify the "
1158 "minimum number of valid arguments."),
1159 f->prototype, min_valid);
1164 assert (f->flags & OPF_MIN_VALID);
1165 if (min_valid > array_n_args)
1167 msg (SE, _("For %s with %d arguments, at most %d (not %d) may be "
1168 "required to be valid."),
1169 f->prototype, n_args, array_n_args, min_valid);
1179 add_arg (union any_node ***args, int *n_args, int *allocated_args,
1180 union any_node *arg)
1182 if (*n_args >= *allocated_args)
1184 *allocated_args += 8;
1185 *args = xrealloc (*args, sizeof **args * *allocated_args);
1188 (*args)[(*n_args)++] = arg;
1192 put_invocation (struct string *s,
1193 const char *func_name, union any_node **args, size_t n_args)
1197 ds_put_format (s, "%s(", func_name);
1198 for (i = 0; i < n_args; i++)
1201 ds_put_cstr (s, ", ");
1202 ds_put_cstr (s, operations[expr_node_returns (args[i])].prototype);
1204 ds_put_byte (s, ')');
1208 no_match (const char *func_name,
1209 union any_node **args, size_t n_args,
1210 const struct operation *first, const struct operation *last)
1213 const struct operation *f;
1217 if (last - first == 1)
1219 ds_put_format (&s, _("Type mismatch invoking %s as "), first->prototype);
1220 put_invocation (&s, func_name, args, n_args);
1224 ds_put_cstr (&s, _("Function invocation "));
1225 put_invocation (&s, func_name, args, n_args);
1226 ds_put_cstr (&s, _(" does not match any known function. Candidates are:"));
1228 for (f = first; f < last; f++)
1229 ds_put_format (&s, "\n%s", f->prototype);
1231 ds_put_byte (&s, '.');
1233 msg (SE, "%s", ds_cstr (&s));
1238 static union any_node *
1239 parse_function (struct lexer *lexer, struct expression *e)
1242 const struct operation *f, *first, *last;
1244 union any_node **args = NULL;
1246 int allocated_args = 0;
1248 struct string func_name;
1252 ds_init_substring (&func_name, lex_tokss (lexer));
1253 min_valid = extract_min_valid (lex_tokcstr (lexer));
1254 if (!lookup_function (lex_tokcstr (lexer), &first, &last))
1256 msg (SE, _("No function or vector named %s."), lex_tokcstr (lexer));
1257 ds_destroy (&func_name);
1262 if (!lex_force_match (lexer, T_LPAREN))
1264 ds_destroy (&func_name);
1269 n_args = allocated_args = 0;
1270 if (lex_token (lexer) != T_RPAREN)
1273 if (lex_token (lexer) == T_ID
1274 && lex_next_token (lexer, 1) == T_TO)
1276 const struct variable **vars;
1280 if (!parse_variables_const (lexer, dataset_dict (e->ds), &vars, &n_vars, PV_SINGLE))
1282 for (i = 0; i < n_vars; i++)
1283 add_arg (&args, &n_args, &allocated_args,
1284 allocate_unary_variable (e, vars[i]));
1289 union any_node *arg = parse_or (lexer, e);
1293 add_arg (&args, &n_args, &allocated_args, arg);
1295 if (lex_match (lexer, T_RPAREN))
1297 else if (!lex_match (lexer, T_COMMA))
1299 lex_error_expecting (lexer, "`,'", "`)'");
1304 for (f = first; f < last; f++)
1305 if (match_function (args, n_args, f))
1309 no_match (ds_cstr (&func_name), args, n_args, first, last);
1313 coerce_function_args (e, f, args, n_args);
1314 if (!validate_function_args (f, n_args, min_valid))
1317 if ((f->flags & OPF_EXTENSION) && settings_get_syntax () == COMPATIBLE)
1318 msg (SW, _("%s is a PSPP extension."), f->prototype);
1319 if (f->flags & OPF_UNIMPLEMENTED)
1321 msg (SE, _("%s is not available in this version of PSPP."),
1325 if ((f->flags & OPF_PERM_ONLY) &&
1326 proc_in_temporary_transformations (e->ds))
1328 msg (SE, _("%s may not appear after %s."), f->prototype, "TEMPORARY");
1332 n = expr_allocate_composite (e, f - operations, args, n_args);
1333 n->composite.min_valid = min_valid != -1 ? min_valid : f->array_min_elems;
1335 if (n->type == OP_LAG_Vn || n->type == OP_LAG_Vs)
1336 dataset_need_lag (e->ds, 1);
1337 else if (n->type == OP_LAG_Vnn || n->type == OP_LAG_Vsn)
1340 assert (n->composite.n_args == 2);
1341 assert (n->composite.args[1]->type == OP_pos_int);
1342 n_before = n->composite.args[1]->integer.i;
1343 dataset_need_lag (e->ds, n_before);
1347 ds_destroy (&func_name);
1352 ds_destroy (&func_name);
1356 /* Utility functions. */
1358 static struct expression *
1359 expr_create (struct dataset *ds)
1361 struct pool *pool = pool_create ();
1362 struct expression *e = pool_alloc (pool, sizeof *e);
1363 e->expr_pool = pool;
1365 e->eval_pool = pool_create_subpool (e->expr_pool);
1368 e->n_ops = e->allocated_ops = 0;
1373 expr_node_returns (const union any_node *n)
1376 assert (is_operation (n->type));
1377 if (is_atom (n->type))
1379 else if (is_composite (n->type))
1380 return operations[n->type].returns;
1386 atom_type_name (atom_type type)
1388 assert (is_atom (type));
1389 return operations[type].name;
1393 expr_allocate_nullary (struct expression *e, operation_type op)
1395 return expr_allocate_composite (e, op, NULL, 0);
1399 expr_allocate_unary (struct expression *e, operation_type op,
1400 union any_node *arg0)
1402 return expr_allocate_composite (e, op, &arg0, 1);
1406 expr_allocate_binary (struct expression *e, operation_type op,
1407 union any_node *arg0, union any_node *arg1)
1409 union any_node *args[2];
1412 return expr_allocate_composite (e, op, args, 2);
1416 is_valid_node (union any_node *n)
1418 const struct operation *op;
1422 assert (is_operation (n->type));
1423 op = &operations[n->type];
1425 if (!is_atom (n->type))
1427 struct composite_node *c = &n->composite;
1429 assert (is_composite (n->type));
1430 assert (c->n_args >= op->n_args);
1431 for (i = 0; i < op->n_args; i++)
1432 assert (is_compatible (op->args[i], expr_node_returns (c->args[i])));
1433 if (c->n_args > op->n_args && !is_operator (n->type))
1435 assert (op->flags & OPF_ARRAY_OPERAND);
1436 for (i = 0; i < c->n_args; i++)
1437 assert (is_compatible (op->args[op->n_args - 1],
1438 expr_node_returns (c->args[i])));
1446 expr_allocate_composite (struct expression *e, operation_type op,
1447 union any_node **args, size_t n_args)
1452 n = pool_alloc (e->expr_pool, sizeof n->composite);
1454 n->composite.n_args = n_args;
1455 n->composite.args = pool_alloc (e->expr_pool,
1456 sizeof *n->composite.args * n_args);
1457 for (i = 0; i < n_args; i++)
1459 if (args[i] == NULL)
1461 n->composite.args[i] = args[i];
1463 memcpy (n->composite.args, args, sizeof *n->composite.args * n_args);
1464 n->composite.min_valid = 0;
1465 assert (is_valid_node (n));
1470 expr_allocate_number (struct expression *e, double d)
1472 union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1473 n->type = OP_number;
1479 expr_allocate_boolean (struct expression *e, double b)
1481 union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1482 assert (b == 0.0 || b == 1.0 || b == SYSMIS);
1483 n->type = OP_boolean;
1489 expr_allocate_integer (struct expression *e, int i)
1491 union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1492 n->type = OP_integer;
1498 expr_allocate_pos_int (struct expression *e, int i)
1500 union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1502 n->type = OP_pos_int;
1508 expr_allocate_vector (struct expression *e, const struct vector *vector)
1510 union any_node *n = pool_alloc (e->expr_pool, sizeof n->vector);
1511 n->type = OP_vector;
1512 n->vector.v = vector;
1517 expr_allocate_string (struct expression *e, struct substring s)
1519 union any_node *n = pool_alloc (e->expr_pool, sizeof n->string);
1520 n->type = OP_string;
1526 expr_allocate_variable (struct expression *e, const struct variable *v)
1528 union any_node *n = pool_alloc (e->expr_pool, sizeof n->variable);
1529 n->type = var_is_numeric (v) ? OP_num_var : OP_str_var;
1535 expr_allocate_format (struct expression *e, const struct fmt_spec *format)
1537 union any_node *n = pool_alloc (e->expr_pool, sizeof n->format);
1538 n->type = OP_format;
1539 n->format.f = *format;
1543 /* Allocates a unary composite node that represents the value of
1544 variable V in expression E. */
1545 static union any_node *
1546 allocate_unary_variable (struct expression *e, const struct variable *v)
1549 return expr_allocate_unary (e, var_is_numeric (v) ? OP_NUM_VAR : OP_STR_VAR,
1550 expr_allocate_variable (e, v));
1553 /* Export function details to other modules. */
1555 /* Returns the operation structure for the function with the
1557 const struct operation *
1558 expr_get_function (size_t idx)
1560 assert (idx < n_OP_function);
1561 return &operations[OP_function_first + idx];
1564 /* Returns the number of expression functions. */
1566 expr_get_n_functions (void)
1568 return n_OP_function;
1571 /* Returns the name of operation OP. */
1573 expr_operation_get_name (const struct operation *op)
1578 /* Returns the human-readable prototype for operation OP. */
1580 expr_operation_get_prototype (const struct operation *op)
1582 return op->prototype;
1585 /* Returns the number of arguments for operation OP. */
1587 expr_operation_get_n_args (const struct operation *op)