1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
26 #include "data/case.h"
27 #include "data/dictionary.h"
28 #include "data/settings.h"
29 #include "data/variable.h"
30 #include "language/expressions/helpers.h"
31 #include "language/lexer/format-parser.h"
32 #include "language/lexer/lexer.h"
33 #include "language/lexer/variable-parser.h"
34 #include "libpspp/array.h"
35 #include "libpspp/assertion.h"
36 #include "libpspp/i18n.h"
37 #include "libpspp/message.h"
38 #include "libpspp/misc.h"
39 #include "libpspp/pool.h"
40 #include "libpspp/str.h"
42 #include "gl/c-strcase.h"
43 #include "gl/xalloc.h"
47 /* Recursive descent parser in order of increasing precedence. */
48 typedef union any_node *parse_recursively_func (struct lexer *, struct expression *);
49 static parse_recursively_func parse_or, parse_and, parse_not;
50 static parse_recursively_func parse_rel, parse_add, parse_mul;
51 static parse_recursively_func parse_neg, parse_exp;
52 static parse_recursively_func parse_primary;
53 static parse_recursively_func parse_vector_element, parse_function;
55 /* Utility functions. */
56 static struct expression *expr_create (struct dataset *ds);
57 atom_type expr_node_returns (const union any_node *);
59 static const char *atom_type_name (atom_type);
60 static struct expression *finish_expression (union any_node *,
62 static bool type_check (const union any_node *, enum val_type expected_type);
63 static union any_node *allocate_unary_variable (struct expression *,
64 const struct variable *);
66 /* Public functions. */
68 /* Parses an expression of the given TYPE. If DS is nonnull then variables and
69 vectors within it may be referenced within the expression; otherwise, the
70 expression must not reference any variables or vectors. Returns the new
71 expression if successful or a null pointer otherwise. If POOL is nonnull,
72 then destroying POOL will free the expression; otherwise, the caller must
73 eventually free it with expr_free(). */
75 expr_parse (struct lexer *lexer, struct pool *pool, struct dataset *ds,
78 assert (val_type_is_valid (type));
80 struct expression *e = expr_create (ds);
81 union any_node *n = parse_or (lexer, e);
82 if (!n || !type_check (n, type))
88 e = finish_expression (expr_optimize (n, e), e);
90 pool_add_subpool (pool, e->expr_pool);
94 /* Parses a boolean expression, otherwise similar to expr_parse(). */
96 expr_parse_bool (struct lexer *lexer, struct pool *pool, struct dataset *ds)
98 struct expression *e = expr_create (ds);
99 union any_node *n = parse_or (lexer, e);
106 atom_type actual_type = expr_node_returns (n);
107 if (actual_type == OP_number)
108 n = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, n,
109 expr_allocate_string (e, ss_empty ()));
110 else if (actual_type != OP_boolean)
112 msg (SE, _("Type mismatch: expression has %s type, "
113 "but a boolean value is required here."),
114 atom_type_name (actual_type));
119 e = finish_expression (expr_optimize (n, e), e);
121 pool_add_subpool (pool, e->expr_pool);
125 /* Parses a numeric expression that is intended to be assigned to newly created
126 variable NEW_VAR_NAME. (This allows for a better error message if the
127 expression is not numeric.) Otherwise similar to expr_parse(). */
129 expr_parse_new_variable (struct lexer *lexer, struct pool *pool, struct dataset *ds,
130 const char *new_var_name)
132 struct expression *e = expr_create (ds);
133 union any_node *n = parse_or (lexer, e);
140 atom_type actual_type = expr_node_returns (n);
141 if (actual_type != OP_number && actual_type != OP_boolean)
143 msg (SE, _("This command tries to create a new variable %s by assigning a "
144 "string value to it, but this is not supported. Use "
145 "the STRING command to create the new variable with the "
146 "correct width before assigning to it, e.g. STRING %s(A20)."),
147 new_var_name, new_var_name);
152 e = finish_expression (expr_optimize (n, e), e);
154 pool_add_subpool (pool, e->expr_pool);
158 /* Free expression E. */
160 expr_free (struct expression *e)
163 pool_destroy (e->expr_pool);
167 expr_parse_any (struct lexer *lexer, struct dataset *ds, bool optimize)
170 struct expression *e;
172 e = expr_create (ds);
173 n = parse_or (lexer, e);
181 n = expr_optimize (n, e);
182 return finish_expression (n, e);
185 /* Finishing up expression building. */
187 /* Height of an expression's stacks. */
190 int number_height; /* Height of number stack. */
191 int string_height; /* Height of string stack. */
194 /* Stack heights used by different kinds of arguments. */
195 static const struct stack_heights on_number_stack = {1, 0};
196 static const struct stack_heights on_string_stack = {0, 1};
197 static const struct stack_heights not_on_stack = {0, 0};
199 /* Returns the stack heights used by an atom of the given
201 static const struct stack_heights *
202 atom_type_stack (atom_type type)
204 assert (is_atom (type));
210 return &on_number_stack;
213 return &on_string_stack;
223 return ¬_on_stack;
230 /* Measures the stack height needed for node N, supposing that
231 the stack height is initially *HEIGHT and updating *HEIGHT to
232 the final stack height. Updates *MAX, if necessary, to
233 reflect the maximum intermediate or final height. */
235 measure_stack (const union any_node *n,
236 struct stack_heights *height, struct stack_heights *max)
238 const struct stack_heights *return_height;
240 if (is_composite (n->type))
242 struct stack_heights args;
246 for (i = 0; i < n->composite.n_args; i++)
247 measure_stack (n->composite.args[i], &args, max);
249 return_height = atom_type_stack (operations[n->type].returns);
252 return_height = atom_type_stack (n->type);
254 height->number_height += return_height->number_height;
255 height->string_height += return_height->string_height;
257 if (height->number_height > max->number_height)
258 max->number_height = height->number_height;
259 if (height->string_height > max->string_height)
260 max->string_height = height->string_height;
263 /* Allocates stacks within E sufficient for evaluating node N. */
265 allocate_stacks (union any_node *n, struct expression *e)
267 struct stack_heights initial = {0, 0};
268 struct stack_heights max = {0, 0};
270 measure_stack (n, &initial, &max);
271 e->number_stack = pool_alloc (e->expr_pool,
272 sizeof *e->number_stack * max.number_height);
273 e->string_stack = pool_alloc (e->expr_pool,
274 sizeof *e->string_stack * max.string_height);
277 /* Finalizes expression E for evaluating node N. */
278 static struct expression *
279 finish_expression (union any_node *n, struct expression *e)
281 /* Allocate stacks. */
282 allocate_stacks (n, e);
284 /* Output postfix representation. */
287 /* The eval_pool might have been used for allocating strings
288 during optimization. We need to keep those strings around
289 for all subsequent evaluations, so start a new eval_pool. */
290 e->eval_pool = pool_create_subpool (e->expr_pool);
295 /* Verifies that expression E, whose root node is *N, can be
296 converted to type EXPECTED_TYPE, inserting a conversion at *N
297 if necessary. Returns true if successful, false on failure. */
299 type_check (const union any_node *n, enum val_type expected_type)
301 atom_type actual_type = expr_node_returns (n);
303 switch (expected_type)
306 if (actual_type != OP_number && actual_type != OP_boolean)
308 msg (SE, _("Type mismatch: expression has %s type, "
309 "but a numeric value is required here."),
310 atom_type_name (actual_type));
316 if (actual_type != OP_string)
318 msg (SE, _("Type mismatch: expression has %s type, "
319 "but a string value is required here."),
320 atom_type_name (actual_type));
332 /* Recursive-descent expression parser. */
334 /* Considers whether *NODE may be coerced to type REQUIRED_TYPE.
335 Returns true if possible, false if disallowed.
337 If DO_COERCION is false, then *NODE is not modified and there
340 If DO_COERCION is true, we perform the coercion if possible,
341 modifying *NODE if necessary. If the coercion is not possible
342 then we free *NODE and set *NODE to a null pointer.
344 This function's interface is somewhat awkward. Use one of the
345 wrapper functions type_coercion(), type_coercion_assert(), or
346 is_coercible() instead. */
348 type_coercion_core (struct expression *e,
349 atom_type required_type,
350 union any_node **node,
351 const char *operator_name,
354 atom_type actual_type;
356 assert (!!do_coercion == (e != NULL));
359 /* Propagate error. Whatever caused the original error
360 already emitted an error message. */
364 actual_type = expr_node_returns (*node);
365 if (actual_type == required_type)
371 switch (required_type)
374 if (actual_type == OP_boolean)
376 /* To enforce strict typing rules, insert Boolean to
377 numeric "conversion". This conversion is a no-op,
378 so it will be removed later. */
380 *node = expr_allocate_unary (e, OP_BOOLEAN_TO_NUM, *node);
386 /* No coercion to string. */
390 if (actual_type == OP_number)
392 /* Convert numeric to boolean. */
395 union any_node *op_name;
397 op_name = expr_allocate_string (e, ss_cstr (operator_name));
398 *node = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, *node,
410 if ((*node)->type == OP_format
411 && fmt_check_input (&(*node)->format.f)
412 && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
416 (*node)->type = OP_ni_format;
424 if ((*node)->type == OP_format
425 && fmt_check_output (&(*node)->format.f)
426 && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
430 (*node)->type = OP_no_format;
437 if ((*node)->type == OP_NUM_VAR)
440 *node = (*node)->composite.args[0];
446 if ((*node)->type == OP_STR_VAR)
449 *node = (*node)->composite.args[0];
455 if ((*node)->type == OP_NUM_VAR || (*node)->type == OP_STR_VAR)
458 *node = (*node)->composite.args[0];
464 if ((*node)->type == OP_number
465 && floor ((*node)->number.n) == (*node)->number.n
466 && (*node)->number.n > 0 && (*node)->number.n < INT_MAX)
469 *node = expr_allocate_pos_int (e, (*node)->number.n);
480 msg (SE, _("Type mismatch while applying %s operator: "
481 "cannot convert %s to %s."),
483 atom_type_name (actual_type), atom_type_name (required_type));
489 /* Coerces *NODE to type REQUIRED_TYPE, and returns success. If
490 *NODE cannot be coerced to the desired type then we issue an
491 error message about operator OPERATOR_NAME and free *NODE. */
493 type_coercion (struct expression *e,
494 atom_type required_type, union any_node **node,
495 const char *operator_name)
497 return type_coercion_core (e, required_type, node, operator_name, true);
500 /* Coerces *NODE to type REQUIRED_TYPE.
501 Assert-fails if the coercion is disallowed. */
503 type_coercion_assert (struct expression *e,
504 atom_type required_type, union any_node **node)
506 int success = type_coercion_core (e, required_type, node, NULL, true);
510 /* Returns true if *NODE may be coerced to type REQUIRED_TYPE,
513 is_coercible (atom_type required_type, union any_node *const *node)
515 return type_coercion_core (NULL, required_type,
516 (union any_node **) node, NULL, false);
519 /* Returns true if ACTUAL_TYPE is a kind of REQUIRED_TYPE, false
522 is_compatible (atom_type required_type, atom_type actual_type)
524 return (required_type == actual_type
525 || (required_type == OP_var
526 && (actual_type == OP_num_var || actual_type == OP_str_var)));
529 /* How to parse an operator. */
532 int token; /* Token representing operator. */
533 operation_type type; /* Operation type representing operation. */
534 const char *name; /* Name of operator. */
537 /* Attempts to match the current token against the tokens for the
538 OP_CNT operators in OPS[]. If successful, returns true
539 and, if OPERATOR is non-null, sets *OPERATOR to the operator.
540 On failure, returns false and, if OPERATOR is non-null, sets
541 *OPERATOR to a null pointer. */
543 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops,
544 const struct operator **operator)
546 const struct operator *op;
548 for (op = ops; op < ops + n_ops; op++)
549 if (lex_token (lexer) == op->token)
551 if (op->token != T_NEG_NUM)
553 if (operator != NULL)
557 if (operator != NULL)
563 check_operator (const struct operator *op, int n_args, atom_type arg_type)
565 const struct operation *o;
569 o = &operations[op->type];
570 assert (o->n_args == n_args);
571 assert ((o->flags & OPF_ARRAY_OPERAND) == 0);
572 for (i = 0; i < n_args; i++)
573 assert (is_compatible (arg_type, o->args[i]));
578 check_binary_operators (const struct operator ops[], size_t n_ops,
583 for (i = 0; i < n_ops; i++)
584 check_operator (&ops[i], 2, arg_type);
589 get_operand_type (const struct operator *op)
591 return operations[op->type].args[0];
594 /* Parses a chain of left-associative operator/operand pairs.
595 There are OP_CNT operators, specified in OPS[]. The
596 operators' operands must all be the same type. The next
597 higher level is parsed by PARSE_NEXT_LEVEL. If CHAIN_WARNING
598 is non-null, then it will be issued as a warning if more than
599 one operator/operand pair is parsed. */
600 static union any_node *
601 parse_binary_operators (struct lexer *lexer, struct expression *e, union any_node *node,
602 const struct operator ops[], size_t n_ops,
603 parse_recursively_func *parse_next_level,
604 const char *chain_warning)
606 atom_type operand_type = get_operand_type (&ops[0]);
608 const struct operator *operator;
610 assert (check_binary_operators (ops, n_ops, operand_type));
614 for (op_count = 0; match_operator (lexer, ops, n_ops, &operator); op_count++)
618 /* Convert the left-hand side to type OPERAND_TYPE. */
619 if (!type_coercion (e, operand_type, &node, operator->name))
622 /* Parse the right-hand side and coerce to type
624 rhs = parse_next_level (lexer, e);
625 if (!type_coercion (e, operand_type, &rhs, operator->name))
627 node = expr_allocate_binary (e, operator->type, node, rhs);
630 if (op_count > 1 && chain_warning != NULL)
631 msg (SW, "%s", chain_warning);
636 static union any_node *
637 parse_inverting_unary_operator (struct lexer *lexer, struct expression *e,
638 const struct operator *op,
639 parse_recursively_func *parse_next_level)
641 union any_node *node;
644 check_operator (op, 1, get_operand_type (op));
647 while (match_operator (lexer, op, 1, NULL))
650 node = parse_next_level (lexer, e);
652 && type_coercion (e, get_operand_type (op), &node, op->name)
653 && op_count % 2 != 0)
654 return expr_allocate_unary (e, op->type, node);
659 /* Parses the OR level. */
660 static union any_node *
661 parse_or (struct lexer *lexer, struct expression *e)
663 static const struct operator op =
664 { T_OR, OP_OR, "logical disjunction (`OR')" };
666 return parse_binary_operators (lexer, e, parse_and (lexer, e), &op, 1, parse_and, NULL);
669 /* Parses the AND level. */
670 static union any_node *
671 parse_and (struct lexer *lexer, struct expression *e)
673 static const struct operator op =
674 { T_AND, OP_AND, "logical conjunction (`AND')" };
676 return parse_binary_operators (lexer, e, parse_not (lexer, e),
677 &op, 1, parse_not, NULL);
680 /* Parses the NOT level. */
681 static union any_node *
682 parse_not (struct lexer *lexer, struct expression *e)
684 static const struct operator op
685 = { T_NOT, OP_NOT, "logical negation (`NOT')" };
686 return parse_inverting_unary_operator (lexer, e, &op, parse_rel);
689 /* Parse relational operators. */
690 static union any_node *
691 parse_rel (struct lexer *lexer, struct expression *e)
693 const char *chain_warning =
694 _("Chaining relational operators (e.g. `a < b < c') will "
695 "not produce the mathematically expected result. "
696 "Use the AND logical operator to fix the problem "
697 "(e.g. `a < b AND b < c'). "
698 "If chaining is really intended, parentheses will disable "
699 "this warning (e.g. `(a < b) < c'.)");
701 union any_node *node = parse_add (lexer, e);
706 switch (expr_node_returns (node))
711 static const struct operator ops[] =
713 { T_EQUALS, OP_EQ, "numeric equality (`=')" },
714 { T_EQ, OP_EQ, "numeric equality (`EQ')" },
715 { T_GE, OP_GE, "numeric greater-than-or-equal-to (`>=')" },
716 { T_GT, OP_GT, "numeric greater than (`>')" },
717 { T_LE, OP_LE, "numeric less-than-or-equal-to (`<=')" },
718 { T_LT, OP_LT, "numeric less than (`<')" },
719 { T_NE, OP_NE, "numeric inequality (`<>')" },
722 return parse_binary_operators (lexer, e, node, ops,
723 sizeof ops / sizeof *ops,
724 parse_add, chain_warning);
729 static const struct operator ops[] =
731 { T_EQUALS, OP_EQ_STRING, "string equality (`=')" },
732 { T_EQ, OP_EQ_STRING, "string equality (`EQ')" },
733 { T_GE, OP_GE_STRING, "string greater-than-or-equal-to (`>=')" },
734 { T_GT, OP_GT_STRING, "string greater than (`>')" },
735 { T_LE, OP_LE_STRING, "string less-than-or-equal-to (`<=')" },
736 { T_LT, OP_LT_STRING, "string less than (`<')" },
737 { T_NE, OP_NE_STRING, "string inequality (`<>')" },
740 return parse_binary_operators (lexer, e, node, ops,
741 sizeof ops / sizeof *ops,
742 parse_add, chain_warning);
750 /* Parses the addition and subtraction level. */
751 static union any_node *
752 parse_add (struct lexer *lexer, struct expression *e)
754 static const struct operator ops[] =
756 { T_PLUS, OP_ADD, "addition (`+')" },
757 { T_DASH, OP_SUB, "subtraction (`-')" },
758 { T_NEG_NUM, OP_ADD, "subtraction (`-')" },
761 return parse_binary_operators (lexer, e, parse_mul (lexer, e),
762 ops, sizeof ops / sizeof *ops,
766 /* Parses the multiplication and division level. */
767 static union any_node *
768 parse_mul (struct lexer *lexer, struct expression *e)
770 static const struct operator ops[] =
772 { T_ASTERISK, OP_MUL, "multiplication (`*')" },
773 { T_SLASH, OP_DIV, "division (`/')" },
776 return parse_binary_operators (lexer, e, parse_neg (lexer, e),
777 ops, sizeof ops / sizeof *ops,
781 /* Parses the unary minus level. */
782 static union any_node *
783 parse_neg (struct lexer *lexer, struct expression *e)
785 static const struct operator op = { T_DASH, OP_NEG, "negation (`-')" };
786 return parse_inverting_unary_operator (lexer, e, &op, parse_exp);
789 static union any_node *
790 parse_exp (struct lexer *lexer, struct expression *e)
792 static const struct operator op =
793 { T_EXP, OP_POW, "exponentiation (`**')" };
795 const char *chain_warning =
796 _("The exponentiation operator (`**') is left-associative, "
797 "even though right-associative semantics are more useful. "
798 "That is, `a**b**c' equals `(a**b)**c', not as `a**(b**c)'. "
799 "To disable this warning, insert parentheses.");
801 union any_node *lhs, *node;
802 bool negative = false;
804 if (lex_token (lexer) == T_NEG_NUM)
806 lhs = expr_allocate_number (e, -lex_tokval (lexer));
811 lhs = parse_primary (lexer, e);
813 node = parse_binary_operators (lexer, e, lhs, &op, 1,
814 parse_primary, chain_warning);
815 return negative ? expr_allocate_unary (e, OP_NEG, node) : node;
818 /* Parses system variables. */
819 static union any_node *
820 parse_sysvar (struct lexer *lexer, struct expression *e)
822 if (lex_match_id (lexer, "$CASENUM"))
823 return expr_allocate_nullary (e, OP_CASENUM);
824 else if (lex_match_id (lexer, "$DATE"))
826 static const char *months[12] =
828 "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
829 "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
832 time_t last_proc_time = time_of_last_procedure (e->ds);
837 time = localtime (&last_proc_time);
838 sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
839 months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
841 ss_alloc_substring (&s, ss_cstr (temp_buf));
842 return expr_allocate_string (e, s);
844 else if (lex_match_id (lexer, "$TRUE"))
845 return expr_allocate_boolean (e, 1.0);
846 else if (lex_match_id (lexer, "$FALSE"))
847 return expr_allocate_boolean (e, 0.0);
848 else if (lex_match_id (lexer, "$SYSMIS"))
849 return expr_allocate_number (e, SYSMIS);
850 else if (lex_match_id (lexer, "$JDATE"))
852 time_t time = time_of_last_procedure (e->ds);
853 struct tm *tm = localtime (&time);
854 return expr_allocate_number (e, expr_ymd_to_ofs (tm->tm_year + 1900,
858 else if (lex_match_id (lexer, "$TIME"))
860 time_t time = time_of_last_procedure (e->ds);
861 struct tm *tm = localtime (&time);
862 return expr_allocate_number (e,
863 expr_ymd_to_date (tm->tm_year + 1900,
866 + tm->tm_hour * 60 * 60.
870 else if (lex_match_id (lexer, "$LENGTH"))
871 return expr_allocate_number (e, settings_get_viewlength ());
872 else if (lex_match_id (lexer, "$WIDTH"))
873 return expr_allocate_number (e, settings_get_viewwidth ());
876 msg (SE, _("Unknown system variable %s."), lex_tokcstr (lexer));
881 /* Parses numbers, varnames, etc. */
882 static union any_node *
883 parse_primary (struct lexer *lexer, struct expression *e)
885 switch (lex_token (lexer))
888 if (lex_next_token (lexer, 1) == T_LPAREN)
890 /* An identifier followed by a left parenthesis may be
891 a vector element reference. If not, it's a function
893 if (e->ds != NULL && dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer)) != NULL)
894 return parse_vector_element (lexer, e);
896 return parse_function (lexer, e);
898 else if (lex_tokcstr (lexer)[0] == '$')
900 /* $ at the beginning indicates a system variable. */
901 return parse_sysvar (lexer, e);
903 else if (e->ds != NULL && dict_lookup_var (dataset_dict (e->ds), lex_tokcstr (lexer)))
905 /* It looks like a user variable.
906 (It could be a format specifier, but we'll assume
907 it's a variable unless proven otherwise. */
908 return allocate_unary_variable (e, parse_variable (lexer, dataset_dict (e->ds)));
912 /* Try to parse it as a format specifier. */
917 ok = parse_format_specifier (lexer, &fmt);
921 return expr_allocate_format (e, &fmt);
923 /* All attempts failed. */
924 msg (SE, _("Unknown identifier %s."), lex_tokcstr (lexer));
932 union any_node *node = expr_allocate_number (e, lex_tokval (lexer));
939 const char *dict_encoding;
940 union any_node *node;
943 dict_encoding = (e->ds != NULL
944 ? dict_get_encoding (dataset_dict (e->ds))
946 s = recode_string_pool (dict_encoding, "UTF-8", lex_tokcstr (lexer),
947 ss_length (lex_tokss (lexer)), e->expr_pool);
948 node = expr_allocate_string (e, ss_cstr (s));
956 /* Count number of left parentheses so that we can match them against
957 an equal number of right parentheses. This defeats trivial attempts
958 to exhaust the stack with a lot of left parentheses. (More
959 sophisticated attacks will still succeed.) */
961 while (lex_match (lexer, T_LPAREN))
964 union any_node *node = parse_or (lexer, e);
968 for (size_t i = 0; i < n; i++)
969 if (!lex_force_match (lexer, T_RPAREN))
976 lex_error (lexer, NULL);
981 static union any_node *
982 parse_vector_element (struct lexer *lexer, struct expression *e)
984 const struct vector *vector;
985 union any_node *element;
987 /* Find vector, skip token.
988 The caller must already have verified that the current token
989 is the name of a vector. */
990 vector = dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer));
991 assert (vector != NULL);
994 /* Skip left parenthesis token.
995 The caller must have verified that the lookahead is a left
997 assert (lex_token (lexer) == T_LPAREN);
1000 element = parse_or (lexer, e);
1001 if (!type_coercion (e, OP_number, &element, "vector indexing")
1002 || !lex_match (lexer, T_RPAREN))
1005 return expr_allocate_binary (e, (vector_get_type (vector) == VAL_NUMERIC
1006 ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR),
1007 element, expr_allocate_vector (e, vector));
1010 /* Individual function parsing. */
1012 const struct operation operations[OP_first + n_OP] = {
1013 #include "parse.inc"
1017 word_matches (const char **test, const char **name)
1019 size_t test_len = strcspn (*test, ".");
1020 size_t name_len = strcspn (*name, ".");
1021 if (test_len == name_len)
1023 if (buf_compare_case (*test, *name, test_len))
1026 else if (test_len < 3 || test_len > name_len)
1030 if (buf_compare_case (*test, *name, test_len))
1036 if (**test != **name)
1047 /* Returns 0 if TOKEN and FUNC do not match,
1048 1 if TOKEN is an acceptable abbreviation for FUNC,
1049 2 if TOKEN equals FUNC. */
1051 compare_function_names (const char *token_, const char *func_)
1053 const char *token = token_;
1054 const char *func = func_;
1055 while (*token || *func)
1056 if (!word_matches (&token, &func))
1058 return !c_strcasecmp (token_, func_) ? 2 : 1;
1062 lookup_function (const char *token,
1063 const struct operation **first,
1064 const struct operation **last)
1066 *first = *last = NULL;
1067 const struct operation *best = NULL;
1069 for (const struct operation *f = operations + OP_function_first;
1070 f <= operations + OP_function_last; f++)
1072 int score = compare_function_names (token, f->name);
1078 else if (score == 1 && !(f->flags & OPF_NO_ABBREV) && !best)
1087 const struct operation *f = best;
1088 while (f <= operations + OP_function_last
1089 && !c_strcasecmp (f->name, best->name))
1097 extract_min_valid (const char *s)
1099 char *p = strrchr (s, '.');
1101 || p[1] < '0' || p[1] > '9'
1102 || strspn (p + 1, "0123456789") != strlen (p + 1))
1105 return atoi (p + 1);
1109 function_arg_type (const struct operation *f, size_t arg_idx)
1111 assert (arg_idx < f->n_args || (f->flags & OPF_ARRAY_OPERAND));
1113 return f->args[arg_idx < f->n_args ? arg_idx : f->n_args - 1];
1117 match_function (union any_node **args, int n_args, const struct operation *f)
1121 if (n_args < f->n_args
1122 || (n_args > f->n_args && (f->flags & OPF_ARRAY_OPERAND) == 0)
1123 || n_args - (f->n_args - 1) < f->array_min_elems)
1126 for (i = 0; i < n_args; i++)
1127 if (!is_coercible (function_arg_type (f, i), &args[i]))
1134 coerce_function_args (struct expression *e, const struct operation *f,
1135 union any_node **args, size_t n_args)
1139 for (i = 0; i < n_args; i++)
1140 type_coercion_assert (e, function_arg_type (f, i), &args[i]);
1144 validate_function_args (const struct operation *f, int n_args, int min_valid)
1146 /* Count the function arguments that go into the trailing array (if any). We
1147 know that there must be at least the minimum number because
1148 match_function() already checked. */
1149 int array_n_args = n_args - (f->n_args - 1);
1150 assert (array_n_args >= f->array_min_elems);
1152 if ((f->flags & OPF_ARRAY_OPERAND)
1153 && array_n_args % f->array_granularity != 0)
1155 /* RANGE is the only case we have so far. It has paired arguments with
1156 one initial argument, and that's the only special case we deal with
1158 assert (f->array_granularity == 2);
1159 assert (n_args % 2 == 0);
1160 msg (SE, _("%s must have an odd number of arguments."), f->prototype);
1164 if (min_valid != -1)
1166 if (f->array_min_elems == 0)
1168 assert ((f->flags & OPF_MIN_VALID) == 0);
1169 msg (SE, _("%s function cannot accept suffix .%d to specify the "
1170 "minimum number of valid arguments."),
1171 f->prototype, min_valid);
1176 assert (f->flags & OPF_MIN_VALID);
1177 if (min_valid > array_n_args)
1179 msg (SE, _("For %s with %d arguments, at most %d (not %d) may be "
1180 "required to be valid."),
1181 f->prototype, n_args, array_n_args, min_valid);
1191 add_arg (union any_node ***args, int *n_args, int *allocated_args,
1192 union any_node *arg)
1194 if (*n_args >= *allocated_args)
1196 *allocated_args += 8;
1197 *args = xrealloc (*args, sizeof **args * *allocated_args);
1200 (*args)[(*n_args)++] = arg;
1204 put_invocation (struct string *s,
1205 const char *func_name, union any_node **args, size_t n_args)
1209 ds_put_format (s, "%s(", func_name);
1210 for (i = 0; i < n_args; i++)
1213 ds_put_cstr (s, ", ");
1214 ds_put_cstr (s, operations[expr_node_returns (args[i])].prototype);
1216 ds_put_byte (s, ')');
1220 no_match (const char *func_name,
1221 union any_node **args, size_t n_args,
1222 const struct operation *first, const struct operation *last)
1225 const struct operation *f;
1229 if (last - first == 1)
1231 ds_put_format (&s, _("Type mismatch invoking %s as "), first->prototype);
1232 put_invocation (&s, func_name, args, n_args);
1236 ds_put_cstr (&s, _("Function invocation "));
1237 put_invocation (&s, func_name, args, n_args);
1238 ds_put_cstr (&s, _(" does not match any known function. Candidates are:"));
1240 for (f = first; f < last; f++)
1241 ds_put_format (&s, "\n%s", f->prototype);
1243 ds_put_byte (&s, '.');
1245 msg (SE, "%s", ds_cstr (&s));
1250 static union any_node *
1251 parse_function (struct lexer *lexer, struct expression *e)
1254 const struct operation *f, *first, *last;
1256 union any_node **args = NULL;
1258 int allocated_args = 0;
1260 struct string func_name;
1264 ds_init_substring (&func_name, lex_tokss (lexer));
1265 min_valid = extract_min_valid (lex_tokcstr (lexer));
1266 if (!lookup_function (lex_tokcstr (lexer), &first, &last))
1268 msg (SE, _("No function or vector named %s."), lex_tokcstr (lexer));
1269 ds_destroy (&func_name);
1274 if (!lex_force_match (lexer, T_LPAREN))
1276 ds_destroy (&func_name);
1281 n_args = allocated_args = 0;
1282 if (lex_token (lexer) != T_RPAREN)
1285 if (lex_token (lexer) == T_ID
1286 && lex_next_token (lexer, 1) == T_TO)
1288 const struct variable **vars;
1292 if (!parse_variables_const (lexer, dataset_dict (e->ds), &vars, &n_vars, PV_SINGLE))
1294 for (i = 0; i < n_vars; i++)
1295 add_arg (&args, &n_args, &allocated_args,
1296 allocate_unary_variable (e, vars[i]));
1301 union any_node *arg = parse_or (lexer, e);
1305 add_arg (&args, &n_args, &allocated_args, arg);
1307 if (lex_match (lexer, T_RPAREN))
1309 else if (!lex_match (lexer, T_COMMA))
1311 lex_error_expecting (lexer, "`,'", "`)'");
1316 for (f = first; f < last; f++)
1317 if (match_function (args, n_args, f))
1321 no_match (ds_cstr (&func_name), args, n_args, first, last);
1325 coerce_function_args (e, f, args, n_args);
1326 if (!validate_function_args (f, n_args, min_valid))
1329 if ((f->flags & OPF_EXTENSION) && settings_get_syntax () == COMPATIBLE)
1330 msg (SW, _("%s is a PSPP extension."), f->prototype);
1331 if (f->flags & OPF_UNIMPLEMENTED)
1333 msg (SE, _("%s is not available in this version of PSPP."),
1337 if ((f->flags & OPF_PERM_ONLY) &&
1338 proc_in_temporary_transformations (e->ds))
1340 msg (SE, _("%s may not appear after %s."), f->prototype, "TEMPORARY");
1344 n = expr_allocate_composite (e, f - operations, args, n_args);
1345 n->composite.min_valid = min_valid != -1 ? min_valid : f->array_min_elems;
1347 if (n->type == OP_LAG_Vn || n->type == OP_LAG_Vs)
1348 dataset_need_lag (e->ds, 1);
1349 else if (n->type == OP_LAG_Vnn || n->type == OP_LAG_Vsn)
1352 assert (n->composite.n_args == 2);
1353 assert (n->composite.args[1]->type == OP_pos_int);
1354 n_before = n->composite.args[1]->integer.i;
1355 dataset_need_lag (e->ds, n_before);
1359 ds_destroy (&func_name);
1364 ds_destroy (&func_name);
1368 /* Utility functions. */
1370 static struct expression *
1371 expr_create (struct dataset *ds)
1373 struct pool *pool = pool_create ();
1374 struct expression *e = pool_alloc (pool, sizeof *e);
1375 e->expr_pool = pool;
1377 e->eval_pool = pool_create_subpool (e->expr_pool);
1380 e->n_ops = e->allocated_ops = 0;
1385 expr_node_returns (const union any_node *n)
1388 assert (is_operation (n->type));
1389 if (is_atom (n->type))
1391 else if (is_composite (n->type))
1392 return operations[n->type].returns;
1398 atom_type_name (atom_type type)
1400 assert (is_atom (type));
1401 return operations[type].name;
1405 expr_allocate_nullary (struct expression *e, operation_type op)
1407 return expr_allocate_composite (e, op, NULL, 0);
1411 expr_allocate_unary (struct expression *e, operation_type op,
1412 union any_node *arg0)
1414 return expr_allocate_composite (e, op, &arg0, 1);
1418 expr_allocate_binary (struct expression *e, operation_type op,
1419 union any_node *arg0, union any_node *arg1)
1421 union any_node *args[2];
1424 return expr_allocate_composite (e, op, args, 2);
1428 is_valid_node (union any_node *n)
1430 const struct operation *op;
1434 assert (is_operation (n->type));
1435 op = &operations[n->type];
1437 if (!is_atom (n->type))
1439 struct composite_node *c = &n->composite;
1441 assert (is_composite (n->type));
1442 assert (c->n_args >= op->n_args);
1443 for (i = 0; i < op->n_args; i++)
1444 assert (is_compatible (op->args[i], expr_node_returns (c->args[i])));
1445 if (c->n_args > op->n_args && !is_operator (n->type))
1447 assert (op->flags & OPF_ARRAY_OPERAND);
1448 for (i = 0; i < c->n_args; i++)
1449 assert (is_compatible (op->args[op->n_args - 1],
1450 expr_node_returns (c->args[i])));
1458 expr_allocate_composite (struct expression *e, operation_type op,
1459 union any_node **args, size_t n_args)
1464 n = pool_alloc (e->expr_pool, sizeof n->composite);
1466 n->composite.n_args = n_args;
1467 n->composite.args = pool_alloc (e->expr_pool,
1468 sizeof *n->composite.args * n_args);
1469 for (i = 0; i < n_args; i++)
1471 if (args[i] == NULL)
1473 n->composite.args[i] = args[i];
1475 memcpy (n->composite.args, args, sizeof *n->composite.args * n_args);
1476 n->composite.min_valid = 0;
1477 assert (is_valid_node (n));
1482 expr_allocate_number (struct expression *e, double d)
1484 union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1485 n->type = OP_number;
1491 expr_allocate_boolean (struct expression *e, double b)
1493 union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1494 assert (b == 0.0 || b == 1.0 || b == SYSMIS);
1495 n->type = OP_boolean;
1501 expr_allocate_integer (struct expression *e, int i)
1503 union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1504 n->type = OP_integer;
1510 expr_allocate_pos_int (struct expression *e, int i)
1512 union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1514 n->type = OP_pos_int;
1520 expr_allocate_vector (struct expression *e, const struct vector *vector)
1522 union any_node *n = pool_alloc (e->expr_pool, sizeof n->vector);
1523 n->type = OP_vector;
1524 n->vector.v = vector;
1529 expr_allocate_string (struct expression *e, struct substring s)
1531 union any_node *n = pool_alloc (e->expr_pool, sizeof n->string);
1532 n->type = OP_string;
1538 expr_allocate_variable (struct expression *e, const struct variable *v)
1540 union any_node *n = pool_alloc (e->expr_pool, sizeof n->variable);
1541 n->type = var_is_numeric (v) ? OP_num_var : OP_str_var;
1547 expr_allocate_format (struct expression *e, const struct fmt_spec *format)
1549 union any_node *n = pool_alloc (e->expr_pool, sizeof n->format);
1550 n->type = OP_format;
1551 n->format.f = *format;
1555 /* Allocates a unary composite node that represents the value of
1556 variable V in expression E. */
1557 static union any_node *
1558 allocate_unary_variable (struct expression *e, const struct variable *v)
1561 return expr_allocate_unary (e, var_is_numeric (v) ? OP_NUM_VAR : OP_STR_VAR,
1562 expr_allocate_variable (e, v));
1565 /* Export function details to other modules. */
1567 /* Returns the operation structure for the function with the
1569 const struct operation *
1570 expr_get_function (size_t idx)
1572 assert (idx < n_OP_function);
1573 return &operations[OP_function_first + idx];
1576 /* Returns the number of expression functions. */
1578 expr_get_n_functions (void)
1580 return n_OP_function;
1583 /* Returns the name of operation OP. */
1585 expr_operation_get_name (const struct operation *op)
1590 /* Returns the human-readable prototype for operation OP. */
1592 expr_operation_get_prototype (const struct operation *op)
1594 return op->prototype;
1597 /* Returns the number of arguments for operation OP. */
1599 expr_operation_get_n_args (const struct operation *op)