pintos-os.org Git - pspp/blob - src/language/expressions/parse.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 1997-9, 2000, 2006, 2010, 2011, 2012, 2014 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "private.h"
  20
  21 #include <ctype.h>
  22 #include <float.h>
  23 #include <limits.h>
  24 #include <stdlib.h>
  25
  26 #include "data/case.h"
  27 #include "data/dictionary.h"
  28 #include "data/settings.h"
  29 #include "data/variable.h"
  30 #include "language/expressions/helpers.h"
  31 #include "language/lexer/format-parser.h"
  32 #include "language/lexer/lexer.h"
  33 #include "language/lexer/variable-parser.h"
  34 #include "libpspp/array.h"
  35 #include "libpspp/assertion.h"
  36 #include "libpspp/i18n.h"
  37 #include "libpspp/message.h"
  38 #include "libpspp/misc.h"
  39 #include "libpspp/pool.h"
  40 #include "libpspp/str.h"
  41
  42 #include "gl/c-strcase.h"
  43 #include "gl/xalloc.h"
  44 \f
  45 /* Declarations. */
  46
  47 /* Recursive descent parser in order of increasing precedence. */
  48 typedef union any_node *parse_recursively_func (struct lexer *, struct expression *);
  49 static parse_recursively_func parse_or, parse_and, parse_not;
  50 static parse_recursively_func parse_rel, parse_add, parse_mul;
  51 static parse_recursively_func parse_neg, parse_exp;
  52 static parse_recursively_func parse_primary;
  53 static parse_recursively_func parse_vector_element, parse_function;
  54
  55 /* Utility functions. */
  56 static struct expression *expr_create (struct dataset *ds);
  57 atom_type expr_node_returns (const union any_node *);
  58
  59 static const char *atom_type_name (atom_type);
  60 static struct expression *finish_expression (union any_node *,
  61                                              struct expression *);
  62 static bool type_check (const union any_node *, enum val_type expected_type);
  63 static union any_node *allocate_unary_variable (struct expression *,
  64                                                 const struct variable *);
  65 \f
  66 /* Public functions. */
  67
  68 /* Parses an expression of the given TYPE.  If DS is nonnull then variables and
  69    vectors within it may be referenced within the expression; otherwise, the
  70    expression must not reference any variables or vectors.  Returns the new
  71    expression if successful or a null pointer otherwise.  If POOL is nonnull,
  72    then destroying POOL will free the expression; otherwise, the caller must
  73    eventually free it with expr_free(). */
  74 struct expression *
  75 expr_parse (struct lexer *lexer, struct pool *pool, struct dataset *ds,
  76             enum val_type type)
  77 {
  78   assert (val_type_is_valid (type));
  79
  80   struct expression *e = expr_create (ds);
  81   union any_node *n = parse_or (lexer, e);
  82   if (!n || !type_check (n, type))
  83     {
  84       expr_free (e);
  85       return NULL;
  86     }
  87
  88   e = finish_expression (expr_optimize (n, e), e);
  89   if (pool)
  90     pool_add_subpool (pool, e->expr_pool);
  91   return e;
  92 }
  93
  94 /* Parses a boolean expression, otherwise similar to expr_parse(). */
  95 struct expression *
  96 expr_parse_bool (struct lexer *lexer, struct pool *pool, struct dataset *ds)
  97 {
  98   struct expression *e = expr_create (ds);
  99   union any_node *n = parse_or (lexer, e);
 100   if (!n)
 101     {
 102       expr_free (e);
 103       return NULL;
 104     }
 105
 106   atom_type actual_type = expr_node_returns (n);
 107   if (actual_type == OP_number)
 108     n = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, n,
 109                               expr_allocate_string (e, ss_empty ()));
 110   else if (actual_type != OP_boolean)
 111     {
 112       msg (SE, _("Type mismatch: expression has %s type, "
 113                  "but a boolean value is required here."),
 114            atom_type_name (actual_type));
 115       expr_free (e);
 116       return NULL;
 117     }
 118
 119   e = finish_expression (expr_optimize (n, e), e);
 120   if (pool)
 121     pool_add_subpool (pool, e->expr_pool);
 122   return e;
 123 }
 124
 125 /* Parses a numeric expression that is intended to be assigned to newly created
 126    variable NEW_VAR_NAME.  (This allows for a better error message if the
 127    expression is not numeric.)  Otherwise similar to expr_parse(). */
 128 struct expression *
 129 expr_parse_new_variable (struct lexer *lexer, struct pool *pool, struct dataset *ds,
 130                          const char *new_var_name)
 131 {
 132   struct expression *e = expr_create (ds);
 133   union any_node *n = parse_or (lexer, e);
 134   if (!n)
 135     {
 136       expr_free (e);
 137       return NULL;
 138     }
 139
 140   atom_type actual_type = expr_node_returns (n);
 141   if (actual_type != OP_number && actual_type != OP_boolean)
 142     {
 143       msg (SE, _("This command tries to create a new variable %s by assigning a "
 144                  "string value to it, but this is not supported.  Use "
 145                  "the STRING command to create the new variable with the "
 146                  "correct width before assigning to it, e.g. STRING %s(A20)."),
 147            new_var_name, new_var_name);
 148       expr_free (e);
 149       return NULL;
 150     }
 151
 152   e = finish_expression (expr_optimize (n, e), e);
 153   if (pool)
 154     pool_add_subpool (pool, e->expr_pool);
 155   return e;
 156 }
 157
 158 /* Free expression E. */
 159 void
 160 expr_free (struct expression *e)
 161 {
 162   if (e != NULL)
 163     pool_destroy (e->expr_pool);
 164 }
 165
 166 struct expression *
 167 expr_parse_any (struct lexer *lexer, struct dataset *ds, bool optimize)
 168 {
 169   union any_node *n;
 170   struct expression *e;
 171
 172   e = expr_create (ds);
 173   n = parse_or (lexer, e);
 174   if (n == NULL)
 175     {
 176       expr_free (e);
 177       return NULL;
 178     }
 179
 180   if (optimize)
 181     n = expr_optimize (n, e);
 182   return finish_expression (n, e);
 183 }
 184 \f
 185 /* Finishing up expression building. */
 186
 187 /* Height of an expression's stacks. */
 188 struct stack_heights
 189   {
 190     int number_height;  /* Height of number stack. */
 191     int string_height;  /* Height of string stack. */
 192   };
 193
 194 /* Stack heights used by different kinds of arguments. */
 195 static const struct stack_heights on_number_stack = {1, 0};
 196 static const struct stack_heights on_string_stack = {0, 1};
 197 static const struct stack_heights not_on_stack = {0, 0};
 198
 199 /* Returns the stack heights used by an atom of the given
 200    TYPE. */
 201 static const struct stack_heights *
 202 atom_type_stack (atom_type type)
 203 {
 204   assert (is_atom (type));
 205
 206   switch (type)
 207     {
 208     case OP_number:
 209     case OP_boolean:
 210       return &on_number_stack;
 211
 212     case OP_string:
 213       return &on_string_stack;
 214
 215     case OP_format:
 216     case OP_ni_format:
 217     case OP_no_format:
 218     case OP_num_var:
 219     case OP_str_var:
 220     case OP_integer:
 221     case OP_pos_int:
 222     case OP_vector:
 223       return &not_on_stack;
 224
 225     default:
 226       NOT_REACHED ();
 227     }
 228 }
 229
 230 /* Measures the stack height needed for node N, supposing that
 231    the stack height is initially *HEIGHT and updating *HEIGHT to
 232    the final stack height.  Updates *MAX, if necessary, to
 233    reflect the maximum intermediate or final height. */
 234 static void
 235 measure_stack (const union any_node *n,
 236                struct stack_heights *height, struct stack_heights *max)
 237 {
 238   const struct stack_heights *return_height;
 239
 240   if (is_composite (n->type))
 241     {
 242       struct stack_heights args;
 243       int i;
 244
 245       args = *height;
 246       for (i = 0; i < n->composite.n_args; i++)
 247         measure_stack (n->composite.args[i], &args, max);
 248
 249       return_height = atom_type_stack (operations[n->type].returns);
 250     }
 251   else
 252     return_height = atom_type_stack (n->type);
 253
 254   height->number_height += return_height->number_height;
 255   height->string_height += return_height->string_height;
 256
 257   if (height->number_height > max->number_height)
 258     max->number_height = height->number_height;
 259   if (height->string_height > max->string_height)
 260     max->string_height = height->string_height;
 261 }
 262
 263 /* Allocates stacks within E sufficient for evaluating node N. */
 264 static void
 265 allocate_stacks (union any_node *n, struct expression *e)
 266 {
 267   struct stack_heights initial = {0, 0};
 268   struct stack_heights max = {0, 0};
 269
 270   measure_stack (n, &initial, &max);
 271   e->number_stack = pool_alloc (e->expr_pool,
 272                                 sizeof *e->number_stack * max.number_height);
 273   e->string_stack = pool_alloc (e->expr_pool,
 274                                 sizeof *e->string_stack * max.string_height);
 275 }
 276
 277 /* Finalizes expression E for evaluating node N. */
 278 static struct expression *
 279 finish_expression (union any_node *n, struct expression *e)
 280 {
 281   /* Allocate stacks. */
 282   allocate_stacks (n, e);
 283
 284   /* Output postfix representation. */
 285   expr_flatten (n, e);
 286
 287   /* The eval_pool might have been used for allocating strings
 288      during optimization.  We need to keep those strings around
 289      for all subsequent evaluations, so start a new eval_pool. */
 290   e->eval_pool = pool_create_subpool (e->expr_pool);
 291
 292   return e;
 293 }
 294
 295 /* Verifies that expression E, whose root node is *N, can be
 296    converted to type EXPECTED_TYPE, inserting a conversion at *N
 297    if necessary.  Returns true if successful, false on failure. */
 298 static bool
 299 type_check (const union any_node *n, enum val_type expected_type)
 300 {
 301   atom_type actual_type = expr_node_returns (n);
 302
 303   switch (expected_type)
 304     {
 305     case VAL_NUMERIC:
 306       if (actual_type != OP_number && actual_type != OP_boolean)
 307         {
 308           msg (SE, _("Type mismatch: expression has %s type, "
 309                      "but a numeric value is required here."),
 310                atom_type_name (actual_type));
 311           return false;
 312         }
 313       break;
 314
 315     case VAL_STRING:
 316       if (actual_type != OP_string)
 317         {
 318           msg (SE, _("Type mismatch: expression has %s type, "
 319                      "but a string value is required here."),
 320                atom_type_name (actual_type));
 321           return false;
 322         }
 323       break;
 324
 325     default:
 326       NOT_REACHED ();
 327     }
 328
 329   return true;
 330 }
 331 \f
 332 /* Recursive-descent expression parser. */
 333
 334 /* Considers whether *NODE may be coerced to type REQUIRED_TYPE.
 335    Returns true if possible, false if disallowed.
 336
 337    If DO_COERCION is false, then *NODE is not modified and there
 338    are no side effects.
 339
 340    If DO_COERCION is true, we perform the coercion if possible,
 341    modifying *NODE if necessary.  If the coercion is not possible
 342    then we free *NODE and set *NODE to a null pointer.
 343
 344    This function's interface is somewhat awkward.  Use one of the
 345    wrapper functions type_coercion(), type_coercion_assert(), or
 346    is_coercible() instead. */
 347 static bool
 348 type_coercion_core (struct expression *e,
 349                     atom_type required_type,
 350                     union any_node **node,
 351                     const char *operator_name,
 352                     bool do_coercion)
 353 {
 354   atom_type actual_type;
 355
 356   assert (!!do_coercion == (e != NULL));
 357   if (*node == NULL)
 358     {
 359       /* Propagate error.  Whatever caused the original error
 360          already emitted an error message. */
 361       return false;
 362     }
 363
 364   actual_type = expr_node_returns (*node);
 365   if (actual_type == required_type)
 366     {
 367       /* Type match. */
 368       return true;
 369     }
 370
 371   switch (required_type)
 372     {
 373     case OP_number:
 374       if (actual_type == OP_boolean)
 375         {
 376           /* To enforce strict typing rules, insert Boolean to
 377              numeric "conversion".  This conversion is a no-op,
 378              so it will be removed later. */
 379           if (do_coercion)
 380             *node = expr_allocate_unary (e, OP_BOOLEAN_TO_NUM, *node);
 381           return true;
 382         }
 383       break;
 384
 385     case OP_string:
 386       /* No coercion to string. */
 387       break;
 388
 389     case OP_boolean:
 390       if (actual_type == OP_number)
 391         {
 392           /* Convert numeric to boolean. */
 393           if (do_coercion)
 394             {
 395               union any_node *op_name;
 396
 397               op_name = expr_allocate_string (e, ss_cstr (operator_name));
 398               *node = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, *node,
 399                                             op_name);
 400             }
 401           return true;
 402         }
 403       break;
 404
 405     case OP_format:
 406       NOT_REACHED ();
 407
 408     case OP_ni_format:
 409       msg_disable ();
 410       if ((*node)->type == OP_format
 411           && fmt_check_input (&(*node)->format.f)
 412           && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
 413         {
 414           msg_enable ();
 415           if (do_coercion)
 416             (*node)->type = OP_ni_format;
 417           return true;
 418         }
 419       msg_enable ();
 420       break;
 421
 422     case OP_no_format:
 423       msg_disable ();
 424       if ((*node)->type == OP_format
 425           && fmt_check_output (&(*node)->format.f)
 426           && fmt_check_type_compat (&(*node)->format.f, VAL_NUMERIC))
 427         {
 428           msg_enable ();
 429           if (do_coercion)
 430             (*node)->type = OP_no_format;
 431           return true;
 432         }
 433       msg_enable ();
 434       break;
 435
 436     case OP_num_var:
 437       if ((*node)->type == OP_NUM_VAR)
 438         {
 439           if (do_coercion)
 440             *node = (*node)->composite.args[0];
 441           return true;
 442         }
 443       break;
 444
 445     case OP_str_var:
 446       if ((*node)->type == OP_STR_VAR)
 447         {
 448           if (do_coercion)
 449             *node = (*node)->composite.args[0];
 450           return true;
 451         }
 452       break;
 453
 454     case OP_var:
 455       if ((*node)->type == OP_NUM_VAR || (*node)->type == OP_STR_VAR)
 456         {
 457           if (do_coercion)
 458             *node = (*node)->composite.args[0];
 459           return true;
 460         }
 461       break;
 462
 463     case OP_pos_int:
 464       if ((*node)->type == OP_number
 465           && floor ((*node)->number.n) == (*node)->number.n
 466           && (*node)->number.n > 0 && (*node)->number.n < INT_MAX)
 467         {
 468           if (do_coercion)
 469             *node = expr_allocate_pos_int (e, (*node)->number.n);
 470           return true;
 471         }
 472       break;
 473
 474     default:
 475       NOT_REACHED ();
 476     }
 477
 478   if (do_coercion)
 479     {
 480       msg (SE, _("Type mismatch while applying %s operator: "
 481                  "cannot convert %s to %s."),
 482            operator_name,
 483            atom_type_name (actual_type), atom_type_name (required_type));
 484       *node = NULL;
 485     }
 486   return false;
 487 }
 488
 489 /* Coerces *NODE to type REQUIRED_TYPE, and returns success.  If
 490    *NODE cannot be coerced to the desired type then we issue an
 491    error message about operator OPERATOR_NAME and free *NODE. */
 492 static bool
 493 type_coercion (struct expression *e,
 494                atom_type required_type, union any_node **node,
 495                const char *operator_name)
 496 {
 497   return type_coercion_core (e, required_type, node, operator_name, true);
 498 }
 499
 500 /* Coerces *NODE to type REQUIRED_TYPE.
 501    Assert-fails if the coercion is disallowed. */
 502 static void
 503 type_coercion_assert (struct expression *e,
 504                       atom_type required_type, union any_node **node)
 505 {
 506   int success = type_coercion_core (e, required_type, node, NULL, true);
 507   assert (success);
 508 }
 509
 510 /* Returns true if *NODE may be coerced to type REQUIRED_TYPE,
 511    false otherwise. */
 512 static bool
 513 is_coercible (atom_type required_type, union any_node *const *node)
 514 {
 515   return type_coercion_core (NULL, required_type,
 516                              (union any_node **) node, NULL, false);
 517 }
 518
 519 /* Returns true if ACTUAL_TYPE is a kind of REQUIRED_TYPE, false
 520    otherwise. */
 521 static bool
 522 is_compatible (atom_type required_type, atom_type actual_type)
 523 {
 524   return (required_type == actual_type
 525           || (required_type == OP_var
 526               && (actual_type == OP_num_var || actual_type == OP_str_var)));
 527 }
 528
 529 /* How to parse an operator. */
 530 struct operator
 531   {
 532     int token;                  /* Token representing operator. */
 533     operation_type type;        /* Operation type representing operation. */
 534     const char *name;           /* Name of operator. */
 535   };
 536
 537 /* Attempts to match the current token against the tokens for the
 538    OP_CNT operators in OPS[].  If successful, returns true
 539    and, if OPERATOR is non-null, sets *OPERATOR to the operator.
 540    On failure, returns false and, if OPERATOR is non-null, sets
 541    *OPERATOR to a null pointer. */
 542 static bool
 543 match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops,
 544                 const struct operator **operator)
 545 {
 546   const struct operator *op;
 547
 548   for (op = ops; op < ops + n_ops; op++)
 549     if (lex_token (lexer) == op->token)
 550       {
 551         if (op->token != T_NEG_NUM)
 552           lex_get (lexer);
 553         if (operator != NULL)
 554           *operator = op;
 555         return true;
 556       }
 557   if (operator != NULL)
 558     *operator = NULL;
 559   return false;
 560 }
 561
 562 static bool
 563 check_operator (const struct operator *op, int n_args, atom_type arg_type)
 564 {
 565   const struct operation *o;
 566   size_t i;
 567
 568   assert (op != NULL);
 569   o = &operations[op->type];
 570   assert (o->n_args == n_args);
 571   assert ((o->flags & OPF_ARRAY_OPERAND) == 0);
 572   for (i = 0; i < n_args; i++)
 573     assert (is_compatible (arg_type, o->args[i]));
 574   return true;
 575 }
 576
 577 static bool
 578 check_binary_operators (const struct operator ops[], size_t n_ops,
 579                         atom_type arg_type)
 580 {
 581   size_t i;
 582
 583   for (i = 0; i < n_ops; i++)
 584     check_operator (&ops[i], 2, arg_type);
 585   return true;
 586 }
 587
 588 static atom_type
 589 get_operand_type (const struct operator *op)
 590 {
 591   return operations[op->type].args[0];
 592 }
 593
 594 /* Parses a chain of left-associative operator/operand pairs.
 595    There are OP_CNT operators, specified in OPS[].  The
 596    operators' operands must all be the same type.  The next
 597    higher level is parsed by PARSE_NEXT_LEVEL.  If CHAIN_WARNING
 598    is non-null, then it will be issued as a warning if more than
 599    one operator/operand pair is parsed. */
 600 static union any_node *
 601 parse_binary_operators (struct lexer *lexer, struct expression *e, union any_node *node,
 602                         const struct operator ops[], size_t n_ops,
 603                         parse_recursively_func *parse_next_level,
 604                         const char *chain_warning)
 605 {
 606   atom_type operand_type = get_operand_type (&ops[0]);
 607   int op_count;
 608   const struct operator *operator;
 609
 610   assert (check_binary_operators (ops, n_ops, operand_type));
 611   if (node == NULL)
 612     return node;
 613
 614   for (op_count = 0; match_operator (lexer, ops, n_ops, &operator); op_count++)
 615     {
 616       union any_node *rhs;
 617
 618       /* Convert the left-hand side to type OPERAND_TYPE. */
 619       if (!type_coercion (e, operand_type, &node, operator->name))
 620         return NULL;
 621
 622       /* Parse the right-hand side and coerce to type
 623          OPERAND_TYPE. */
 624       rhs = parse_next_level (lexer, e);
 625       if (!type_coercion (e, operand_type, &rhs, operator->name))
 626         return NULL;
 627       node = expr_allocate_binary (e, operator->type, node, rhs);
 628     }
 629
 630   if (op_count > 1 && chain_warning != NULL)
 631     msg (SW, "%s", chain_warning);
 632
 633   return node;
 634 }
 635
 636 static union any_node *
 637 parse_inverting_unary_operator (struct lexer *lexer, struct expression *e,
 638                                 const struct operator *op,
 639                                 parse_recursively_func *parse_next_level)
 640 {
 641   union any_node *node;
 642   unsigned op_count;
 643
 644   check_operator (op, 1, get_operand_type (op));
 645
 646   op_count = 0;
 647   while (match_operator (lexer, op, 1, NULL))
 648     op_count++;
 649
 650   node = parse_next_level (lexer, e);
 651   if (op_count > 0
 652       && type_coercion (e, get_operand_type (op), &node, op->name)
 653       && op_count % 2 != 0)
 654     return expr_allocate_unary (e, op->type, node);
 655   else
 656     return node;
 657 }
 658
 659 /* Parses the OR level. */
 660 static union any_node *
 661 parse_or (struct lexer *lexer, struct expression *e)
 662 {
 663   static const struct operator op =
 664     { T_OR, OP_OR, "logical disjunction (`OR')" };
 665
 666   return parse_binary_operators (lexer, e, parse_and (lexer, e), &op, 1, parse_and, NULL);
 667 }
 668
 669 /* Parses the AND level. */
 670 static union any_node *
 671 parse_and (struct lexer *lexer, struct expression *e)
 672 {
 673   static const struct operator op =
 674     { T_AND, OP_AND, "logical conjunction (`AND')" };
 675
 676   return parse_binary_operators (lexer, e, parse_not (lexer, e),
 677                                  &op, 1, parse_not, NULL);
 678 }
 679
 680 /* Parses the NOT level. */
 681 static union any_node *
 682 parse_not (struct lexer *lexer, struct expression *e)
 683 {
 684   static const struct operator op
 685     = { T_NOT, OP_NOT, "logical negation (`NOT')" };
 686   return parse_inverting_unary_operator (lexer, e, &op, parse_rel);
 687 }
 688
 689 /* Parse relational operators. */
 690 static union any_node *
 691 parse_rel (struct lexer *lexer, struct expression *e)
 692 {
 693   const char *chain_warning =
 694     _("Chaining relational operators (e.g. `a < b < c') will "
 695       "not produce the mathematically expected result.  "
 696       "Use the AND logical operator to fix the problem "
 697       "(e.g. `a < b AND b < c').  "
 698       "If chaining is really intended, parentheses will disable "
 699       "this warning (e.g. `(a < b) < c'.)");
 700
 701   union any_node *node = parse_add (lexer, e);
 702
 703   if (node == NULL)
 704     return NULL;
 705
 706   switch (expr_node_returns (node))
 707     {
 708     case OP_number:
 709     case OP_boolean:
 710       {
 711         static const struct operator ops[] =
 712           {
 713             { T_EQUALS, OP_EQ, "numeric equality (`=')" },
 714             { T_EQ, OP_EQ, "numeric equality (`EQ')" },
 715             { T_GE, OP_GE, "numeric greater-than-or-equal-to (`>=')" },
 716             { T_GT, OP_GT, "numeric greater than (`>')" },
 717             { T_LE, OP_LE, "numeric less-than-or-equal-to (`<=')" },
 718             { T_LT, OP_LT, "numeric less than (`<')" },
 719             { T_NE, OP_NE, "numeric inequality (`<>')" },
 720           };
 721
 722         return parse_binary_operators (lexer, e, node, ops,
 723                                        sizeof ops / sizeof *ops,
 724                                        parse_add, chain_warning);
 725       }
 726
 727     case OP_string:
 728       {
 729         static const struct operator ops[] =
 730           {
 731             { T_EQUALS, OP_EQ_STRING, "string equality (`=')" },
 732             { T_EQ, OP_EQ_STRING, "string equality (`EQ')" },
 733             { T_GE, OP_GE_STRING, "string greater-than-or-equal-to (`>=')" },
 734             { T_GT, OP_GT_STRING, "string greater than (`>')" },
 735             { T_LE, OP_LE_STRING, "string less-than-or-equal-to (`<=')" },
 736             { T_LT, OP_LT_STRING, "string less than (`<')" },
 737             { T_NE, OP_NE_STRING, "string inequality (`<>')" },
 738           };
 739
 740         return parse_binary_operators (lexer, e, node, ops,
 741                                        sizeof ops / sizeof *ops,
 742                                        parse_add, chain_warning);
 743       }
 744
 745     default:
 746       return node;
 747     }
 748 }
 749
 750 /* Parses the addition and subtraction level. */
 751 static union any_node *
 752 parse_add (struct lexer *lexer, struct expression *e)
 753 {
 754   static const struct operator ops[] =
 755     {
 756       { T_PLUS, OP_ADD, "addition (`+')" },
 757       { T_DASH, OP_SUB, "subtraction (`-')" },
 758       { T_NEG_NUM, OP_ADD, "subtraction (`-')" },
 759     };
 760
 761   return parse_binary_operators (lexer, e, parse_mul (lexer, e),
 762                                  ops, sizeof ops / sizeof *ops,
 763                                  parse_mul, NULL);
 764 }
 765
 766 /* Parses the multiplication and division level. */
 767 static union any_node *
 768 parse_mul (struct lexer *lexer, struct expression *e)
 769 {
 770   static const struct operator ops[] =
 771     {
 772       { T_ASTERISK, OP_MUL, "multiplication (`*')" },
 773       { T_SLASH, OP_DIV, "division (`/')" },
 774     };
 775
 776   return parse_binary_operators (lexer, e, parse_neg (lexer, e),
 777                                  ops, sizeof ops / sizeof *ops,
 778                                  parse_neg, NULL);
 779 }
 780
 781 /* Parses the unary minus level. */
 782 static union any_node *
 783 parse_neg (struct lexer *lexer, struct expression *e)
 784 {
 785   static const struct operator op = { T_DASH, OP_NEG, "negation (`-')" };
 786   return parse_inverting_unary_operator (lexer, e, &op, parse_exp);
 787 }
 788
 789 static union any_node *
 790 parse_exp (struct lexer *lexer, struct expression *e)
 791 {
 792   static const struct operator op =
 793     { T_EXP, OP_POW, "exponentiation (`**')" };
 794
 795   const char *chain_warning =
 796     _("The exponentiation operator (`**') is left-associative, "
 797       "even though right-associative semantics are more useful.  "
 798       "That is, `a**b**c' equals `(a**b)**c', not as `a**(b**c)'.  "
 799       "To disable this warning, insert parentheses.");
 800
 801   union any_node *lhs, *node;
 802   bool negative = false;
 803
 804   if (lex_token (lexer) == T_NEG_NUM)
 805     {
 806       lhs = expr_allocate_number (e, -lex_tokval (lexer));
 807       negative = true;
 808       lex_get (lexer);
 809     }
 810   else
 811     lhs = parse_primary (lexer, e);
 812
 813   node = parse_binary_operators (lexer, e, lhs, &op, 1,
 814                                   parse_primary, chain_warning);
 815   return negative ? expr_allocate_unary (e, OP_NEG, node) : node;
 816 }
 817
 818 /* Parses system variables. */
 819 static union any_node *
 820 parse_sysvar (struct lexer *lexer, struct expression *e)
 821 {
 822   if (lex_match_id (lexer, "$CASENUM"))
 823     return expr_allocate_nullary (e, OP_CASENUM);
 824   else if (lex_match_id (lexer, "$DATE"))
 825     {
 826       static const char *months[12] =
 827         {
 828           "JAN", "FEB", "MAR", "APR", "MAY", "JUN",
 829           "JUL", "AUG", "SEP", "OCT", "NOV", "DEC",
 830         };
 831
 832       time_t last_proc_time = time_of_last_procedure (e->ds);
 833       struct tm *time;
 834       char temp_buf[10];
 835       struct substring s;
 836
 837       time = localtime (&last_proc_time);
 838       sprintf (temp_buf, "%02d %s %02d", abs (time->tm_mday) % 100,
 839                months[abs (time->tm_mon) % 12], abs (time->tm_year) % 100);
 840
 841       ss_alloc_substring (&s, ss_cstr (temp_buf));
 842       return expr_allocate_string (e, s);
 843     }
 844   else if (lex_match_id (lexer, "$TRUE"))
 845     return expr_allocate_boolean (e, 1.0);
 846   else if (lex_match_id (lexer, "$FALSE"))
 847     return expr_allocate_boolean (e, 0.0);
 848   else if (lex_match_id (lexer, "$SYSMIS"))
 849     return expr_allocate_number (e, SYSMIS);
 850   else if (lex_match_id (lexer, "$JDATE"))
 851     {
 852       time_t time = time_of_last_procedure (e->ds);
 853       struct tm *tm = localtime (&time);
 854       return expr_allocate_number (e, expr_ymd_to_ofs (tm->tm_year + 1900,
 855                                                        tm->tm_mon + 1,
 856                                                        tm->tm_mday));
 857     }
 858   else if (lex_match_id (lexer, "$TIME"))
 859     {
 860       time_t time = time_of_last_procedure (e->ds);
 861       struct tm *tm = localtime (&time);
 862       return expr_allocate_number (e,
 863                                    expr_ymd_to_date (tm->tm_year + 1900,
 864                                                      tm->tm_mon + 1,
 865                                                      tm->tm_mday)
 866                                    + tm->tm_hour * 60 * 60.
 867                                    + tm->tm_min * 60.
 868                                    + tm->tm_sec);
 869     }
 870   else if (lex_match_id (lexer, "$LENGTH"))
 871     return expr_allocate_number (e, settings_get_viewlength ());
 872   else if (lex_match_id (lexer, "$WIDTH"))
 873     return expr_allocate_number (e, settings_get_viewwidth ());
 874   else
 875     {
 876       msg (SE, _("Unknown system variable %s."), lex_tokcstr (lexer));
 877       return NULL;
 878     }
 879 }
 880
 881 /* Parses numbers, varnames, etc. */
 882 static union any_node *
 883 parse_primary (struct lexer *lexer, struct expression *e)
 884 {
 885   switch (lex_token (lexer))
 886     {
 887     case T_ID:
 888       if (lex_next_token (lexer, 1) == T_LPAREN)
 889         {
 890           /* An identifier followed by a left parenthesis may be
 891              a vector element reference.  If not, it's a function
 892              call. */
 893           if (e->ds != NULL && dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer)) != NULL)
 894             return parse_vector_element (lexer, e);
 895           else
 896             return parse_function (lexer, e);
 897         }
 898       else if (lex_tokcstr (lexer)[0] == '$')
 899         {
 900           /* $ at the beginning indicates a system variable. */
 901           return parse_sysvar (lexer, e);
 902         }
 903       else if (e->ds != NULL && dict_lookup_var (dataset_dict (e->ds), lex_tokcstr (lexer)))
 904         {
 905           /* It looks like a user variable.
 906              (It could be a format specifier, but we'll assume
 907              it's a variable unless proven otherwise. */
 908           return allocate_unary_variable (e, parse_variable (lexer, dataset_dict (e->ds)));
 909         }
 910       else
 911         {
 912           /* Try to parse it as a format specifier. */
 913           struct fmt_spec fmt;
 914           bool ok;
 915
 916           msg_disable ();
 917           ok = parse_format_specifier (lexer, &fmt);
 918           msg_enable ();
 919
 920           if (ok)
 921             return expr_allocate_format (e, &fmt);
 922
 923           /* All attempts failed. */
 924           msg (SE, _("Unknown identifier %s."), lex_tokcstr (lexer));
 925           return NULL;
 926         }
 927       break;
 928
 929     case T_POS_NUM:
 930     case T_NEG_NUM:
 931       {
 932         union any_node *node = expr_allocate_number (e, lex_tokval (lexer));
 933         lex_get (lexer);
 934         return node;
 935       }
 936
 937     case T_STRING:
 938       {
 939         const char *dict_encoding;
 940         union any_node *node;
 941         char *s;
 942
 943         dict_encoding = (e->ds != NULL
 944                          ? dict_get_encoding (dataset_dict (e->ds))
 945                          : "UTF-8");
 946         s = recode_string_pool (dict_encoding, "UTF-8", lex_tokcstr (lexer),
 947                            ss_length (lex_tokss (lexer)), e->expr_pool);
 948         node = expr_allocate_string (e, ss_cstr (s));
 949
 950         lex_get (lexer);
 951         return node;
 952       }
 953
 954     case T_LPAREN:
 955       {
 956         /* Count number of left parentheses so that we can match them against
 957            an equal number of right parentheses.  This defeats trivial attempts
 958            to exhaust the stack with a lot of left parentheses.  (More
 959            sophisticated attacks will still succeed.) */
 960         size_t n = 0;
 961         while (lex_match (lexer, T_LPAREN))
 962           n++;
 963
 964         union any_node *node = parse_or (lexer, e);
 965         if (!node)
 966           return NULL;
 967
 968         for (size_t i = 0; i < n; i++)
 969           if (!lex_force_match (lexer, T_RPAREN))
 970             return NULL;
 971
 972         return node;
 973       }
 974
 975     default:
 976       lex_error (lexer, NULL);
 977       return NULL;
 978     }
 979 }
 980
 981 static union any_node *
 982 parse_vector_element (struct lexer *lexer, struct expression *e)
 983 {
 984   const struct vector *vector;
 985   union any_node *element;
 986
 987   /* Find vector, skip token.
 988      The caller must already have verified that the current token
 989      is the name of a vector. */
 990   vector = dict_lookup_vector (dataset_dict (e->ds), lex_tokcstr (lexer));
 991   assert (vector != NULL);
 992   lex_get (lexer);
 993
 994   /* Skip left parenthesis token.
 995      The caller must have verified that the lookahead is a left
 996      parenthesis. */
 997   assert (lex_token (lexer) == T_LPAREN);
 998   lex_get (lexer);
 999
1000   element = parse_or (lexer, e);
1001   if (!type_coercion (e, OP_number, &element, "vector indexing")
1002       || !lex_match (lexer, T_RPAREN))
1003     return NULL;
1004
1005   return expr_allocate_binary (e, (vector_get_type (vector) == VAL_NUMERIC
1006                                    ? OP_VEC_ELEM_NUM : OP_VEC_ELEM_STR),
1007                                element, expr_allocate_vector (e, vector));
1008 }
1009 \f
1010 /* Individual function parsing. */
1011
1012 const struct operation operations[OP_first + n_OP] = {
1013 #include "parse.inc"
1014 };
1015
1016 static bool
1017 word_matches (const char **test, const char **name)
1018 {
1019   size_t test_len = strcspn (*test, ".");
1020   size_t name_len = strcspn (*name, ".");
1021   if (test_len == name_len)
1022     {
1023       if (buf_compare_case (*test, *name, test_len))
1024         return false;
1025     }
1026   else if (test_len < 3 || test_len > name_len)
1027     return false;
1028   else
1029     {
1030       if (buf_compare_case (*test, *name, test_len))
1031         return false;
1032     }
1033
1034   *test += test_len;
1035   *name += name_len;
1036   if (**test != **name)
1037     return false;
1038
1039   if (**test == '.')
1040     {
1041       (*test)++;
1042       (*name)++;
1043     }
1044   return true;
1045 }
1046
1047 /* Returns 0 if TOKEN and FUNC do not match,
1048    1 if TOKEN is an acceptable abbreviation for FUNC,
1049    2 if TOKEN equals FUNC. */
1050 static int
1051 compare_function_names (const char *token_, const char *func_)
1052 {
1053   const char *token = token_;
1054   const char *func = func_;
1055   while (*token || *func)
1056     if (!word_matches (&token, &func))
1057       return 0;
1058   return !c_strcasecmp (token_, func_) ? 2 : 1;
1059 }
1060
1061 static bool
1062 lookup_function (const char *token,
1063                  const struct operation **first,
1064                  const struct operation **last)
1065 {
1066   *first = *last = NULL;
1067   const struct operation *best = NULL;
1068
1069   for (const struct operation *f = operations + OP_function_first;
1070        f <= operations + OP_function_last; f++)
1071     {
1072       int score = compare_function_names (token, f->name);
1073       if (score == 2)
1074         {
1075           best = f;
1076           break;
1077         }
1078       else if (score == 1 && !(f->flags & OPF_NO_ABBREV) && !best)
1079         best = f;
1080     }
1081
1082   if (!best)
1083     return false;
1084
1085   *first = best;
1086
1087   const struct operation *f = best;
1088   while (f <= operations + OP_function_last
1089          && !c_strcasecmp (f->name, best->name))
1090     f++;
1091   *last = f;
1092
1093   return true;
1094 }
1095
1096 static int
1097 extract_min_valid (const char *s)
1098 {
1099   char *p = strrchr (s, '.');
1100   if (p == NULL
1101       || p[1] < '0' || p[1] > '9'
1102       || strspn (p + 1, "0123456789") != strlen (p + 1))
1103     return -1;
1104   *p = '\0';
1105   return atoi (p + 1);
1106 }
1107
1108 static atom_type
1109 function_arg_type (const struct operation *f, size_t arg_idx)
1110 {
1111   assert (arg_idx < f->n_args || (f->flags & OPF_ARRAY_OPERAND));
1112
1113   return f->args[arg_idx < f->n_args ? arg_idx : f->n_args - 1];
1114 }
1115
1116 static bool
1117 match_function (union any_node **args, int n_args, const struct operation *f)
1118 {
1119   size_t i;
1120
1121   if (n_args < f->n_args
1122       || (n_args > f->n_args && (f->flags & OPF_ARRAY_OPERAND) == 0)
1123       || n_args - (f->n_args - 1) < f->array_min_elems)
1124     return false;
1125
1126   for (i = 0; i < n_args; i++)
1127     if (!is_coercible (function_arg_type (f, i), &args[i]))
1128       return false;
1129
1130   return true;
1131 }
1132
1133 static void
1134 coerce_function_args (struct expression *e, const struct operation *f,
1135                       union any_node **args, size_t n_args)
1136 {
1137   int i;
1138
1139   for (i = 0; i < n_args; i++)
1140     type_coercion_assert (e, function_arg_type (f, i), &args[i]);
1141 }
1142
1143 static bool
1144 validate_function_args (const struct operation *f, int n_args, int min_valid)
1145 {
1146   /* Count the function arguments that go into the trailing array (if any).  We
1147      know that there must be at least the minimum number because
1148      match_function() already checked. */
1149   int array_n_args = n_args - (f->n_args - 1);
1150   assert (array_n_args >= f->array_min_elems);
1151
1152   if ((f->flags & OPF_ARRAY_OPERAND)
1153       && array_n_args % f->array_granularity != 0)
1154     {
1155       /* RANGE is the only case we have so far.  It has paired arguments with
1156          one initial argument, and that's the only special case we deal with
1157          here. */
1158       assert (f->array_granularity == 2);
1159       assert (n_args % 2 == 0);
1160       msg (SE, _("%s must have an odd number of arguments."), f->prototype);
1161       return false;
1162     }
1163
1164   if (min_valid != -1)
1165     {
1166       if (f->array_min_elems == 0)
1167         {
1168           assert ((f->flags & OPF_MIN_VALID) == 0);
1169           msg (SE, _("%s function cannot accept suffix .%d to specify the "
1170                      "minimum number of valid arguments."),
1171                f->prototype, min_valid);
1172           return false;
1173         }
1174       else
1175         {
1176           assert (f->flags & OPF_MIN_VALID);
1177           if (min_valid > array_n_args)
1178             {
1179               msg (SE, _("For %s with %d arguments, at most %d (not %d) may be "
1180                          "required to be valid."),
1181                    f->prototype, n_args, array_n_args, min_valid);
1182               return false;
1183             }
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 static void
1191 add_arg (union any_node ***args, int *n_args, int *allocated_args,
1192          union any_node *arg)
1193 {
1194   if (*n_args >= *allocated_args)
1195     {
1196       *allocated_args += 8;
1197       *args = xrealloc (*args, sizeof **args * *allocated_args);
1198     }
1199
1200   (*args)[(*n_args)++] = arg;
1201 }
1202
1203 static void
1204 put_invocation (struct string *s,
1205                 const char *func_name, union any_node **args, size_t n_args)
1206 {
1207   size_t i;
1208
1209   ds_put_format (s, "%s(", func_name);
1210   for (i = 0; i < n_args; i++)
1211     {
1212       if (i > 0)
1213         ds_put_cstr (s, ", ");
1214       ds_put_cstr (s, operations[expr_node_returns (args[i])].prototype);
1215     }
1216   ds_put_byte (s, ')');
1217 }
1218
1219 static void
1220 no_match (const char *func_name,
1221           union any_node **args, size_t n_args,
1222           const struct operation *first, const struct operation *last)
1223 {
1224   struct string s;
1225   const struct operation *f;
1226
1227   ds_init_empty (&s);
1228
1229   if (last - first == 1)
1230     {
1231       ds_put_format (&s, _("Type mismatch invoking %s as "), first->prototype);
1232       put_invocation (&s, func_name, args, n_args);
1233     }
1234   else
1235     {
1236       ds_put_cstr (&s, _("Function invocation "));
1237       put_invocation (&s, func_name, args, n_args);
1238       ds_put_cstr (&s, _(" does not match any known function.  Candidates are:"));
1239
1240       for (f = first; f < last; f++)
1241         ds_put_format (&s, "\n%s", f->prototype);
1242     }
1243   ds_put_byte (&s, '.');
1244
1245   msg (SE, "%s", ds_cstr (&s));
1246
1247   ds_destroy (&s);
1248 }
1249
1250 static union any_node *
1251 parse_function (struct lexer *lexer, struct expression *e)
1252 {
1253   int min_valid;
1254   const struct operation *f, *first, *last;
1255
1256   union any_node **args = NULL;
1257   int n_args = 0;
1258   int allocated_args = 0;
1259
1260   struct string func_name;
1261
1262   union any_node *n;
1263
1264   ds_init_substring (&func_name, lex_tokss (lexer));
1265   min_valid = extract_min_valid (lex_tokcstr (lexer));
1266   if (!lookup_function (lex_tokcstr (lexer), &first, &last))
1267     {
1268       msg (SE, _("No function or vector named %s."), lex_tokcstr (lexer));
1269       ds_destroy (&func_name);
1270       return NULL;
1271     }
1272
1273   lex_get (lexer);
1274   if (!lex_force_match (lexer, T_LPAREN))
1275     {
1276       ds_destroy (&func_name);
1277       return NULL;
1278     }
1279
1280   args = NULL;
1281   n_args = allocated_args = 0;
1282   if (lex_token (lexer) != T_RPAREN)
1283     for (;;)
1284       {
1285         if (lex_token (lexer) == T_ID
1286             && lex_next_token (lexer, 1) == T_TO)
1287           {
1288             const struct variable **vars;
1289             size_t n_vars;
1290             size_t i;
1291
1292             if (!parse_variables_const (lexer, dataset_dict (e->ds), &vars, &n_vars, PV_SINGLE))
1293               goto fail;
1294             for (i = 0; i < n_vars; i++)
1295               add_arg (&args, &n_args, &allocated_args,
1296                        allocate_unary_variable (e, vars[i]));
1297             free (vars);
1298           }
1299         else
1300           {
1301             union any_node *arg = parse_or (lexer, e);
1302             if (arg == NULL)
1303               goto fail;
1304
1305             add_arg (&args, &n_args, &allocated_args, arg);
1306           }
1307         if (lex_match (lexer, T_RPAREN))
1308           break;
1309         else if (!lex_match (lexer, T_COMMA))
1310           {
1311             lex_error_expecting (lexer, "`,'", "`)'");
1312             goto fail;
1313           }
1314       }
1315
1316   for (f = first; f < last; f++)
1317     if (match_function (args, n_args, f))
1318       break;
1319   if (f >= last)
1320     {
1321       no_match (ds_cstr (&func_name), args, n_args, first, last);
1322       goto fail;
1323     }
1324
1325   coerce_function_args (e, f, args, n_args);
1326   if (!validate_function_args (f, n_args, min_valid))
1327     goto fail;
1328
1329   if ((f->flags & OPF_EXTENSION) && settings_get_syntax () == COMPATIBLE)
1330     msg (SW, _("%s is a PSPP extension."), f->prototype);
1331   if (f->flags & OPF_UNIMPLEMENTED)
1332     {
1333       msg (SE, _("%s is not available in this version of PSPP."),
1334            f->prototype);
1335       goto fail;
1336     }
1337   if ((f->flags & OPF_PERM_ONLY) &&
1338       proc_in_temporary_transformations (e->ds))
1339     {
1340       msg (SE, _("%s may not appear after %s."), f->prototype, "TEMPORARY");
1341       goto fail;
1342     }
1343
1344   n = expr_allocate_composite (e, f - operations, args, n_args);
1345   n->composite.min_valid = min_valid != -1 ? min_valid : f->array_min_elems;
1346
1347   if (n->type == OP_LAG_Vn || n->type == OP_LAG_Vs)
1348     dataset_need_lag (e->ds, 1);
1349   else if (n->type == OP_LAG_Vnn || n->type == OP_LAG_Vsn)
1350     {
1351       int n_before;
1352       assert (n->composite.n_args == 2);
1353       assert (n->composite.args[1]->type == OP_pos_int);
1354       n_before = n->composite.args[1]->integer.i;
1355       dataset_need_lag (e->ds, n_before);
1356     }
1357
1358   free (args);
1359   ds_destroy (&func_name);
1360   return n;
1361
1362 fail:
1363   free (args);
1364   ds_destroy (&func_name);
1365   return NULL;
1366 }
1367 \f
1368 /* Utility functions. */
1369
1370 static struct expression *
1371 expr_create (struct dataset *ds)
1372 {
1373   struct pool *pool = pool_create ();
1374   struct expression *e = pool_alloc (pool, sizeof *e);
1375   e->expr_pool = pool;
1376   e->ds = ds;
1377   e->eval_pool = pool_create_subpool (e->expr_pool);
1378   e->ops = NULL;
1379   e->op_types = NULL;
1380   e->n_ops = e->allocated_ops = 0;
1381   return e;
1382 }
1383
1384 atom_type
1385 expr_node_returns (const union any_node *n)
1386 {
1387   assert (n != NULL);
1388   assert (is_operation (n->type));
1389   if (is_atom (n->type))
1390     return n->type;
1391   else if (is_composite (n->type))
1392     return operations[n->type].returns;
1393   else
1394     NOT_REACHED ();
1395 }
1396
1397 static const char *
1398 atom_type_name (atom_type type)
1399 {
1400   assert (is_atom (type));
1401   return operations[type].name;
1402 }
1403
1404 union any_node *
1405 expr_allocate_nullary (struct expression *e, operation_type op)
1406 {
1407   return expr_allocate_composite (e, op, NULL, 0);
1408 }
1409
1410 union any_node *
1411 expr_allocate_unary (struct expression *e, operation_type op,
1412                      union any_node *arg0)
1413 {
1414   return expr_allocate_composite (e, op, &arg0, 1);
1415 }
1416
1417 union any_node *
1418 expr_allocate_binary (struct expression *e, operation_type op,
1419                       union any_node *arg0, union any_node *arg1)
1420 {
1421   union any_node *args[2];
1422   args[0] = arg0;
1423   args[1] = arg1;
1424   return expr_allocate_composite (e, op, args, 2);
1425 }
1426
1427 static bool
1428 is_valid_node (union any_node *n)
1429 {
1430   const struct operation *op;
1431   size_t i;
1432
1433   assert (n != NULL);
1434   assert (is_operation (n->type));
1435   op = &operations[n->type];
1436
1437   if (!is_atom (n->type))
1438     {
1439       struct composite_node *c = &n->composite;
1440
1441       assert (is_composite (n->type));
1442       assert (c->n_args >= op->n_args);
1443       for (i = 0; i < op->n_args; i++)
1444         assert (is_compatible (op->args[i], expr_node_returns (c->args[i])));
1445       if (c->n_args > op->n_args && !is_operator (n->type))
1446         {
1447           assert (op->flags & OPF_ARRAY_OPERAND);
1448           for (i = 0; i < c->n_args; i++)
1449             assert (is_compatible (op->args[op->n_args - 1],
1450                                    expr_node_returns (c->args[i])));
1451         }
1452     }
1453
1454   return true;
1455 }
1456
1457 union any_node *
1458 expr_allocate_composite (struct expression *e, operation_type op,
1459                          union any_node **args, size_t n_args)
1460 {
1461   union any_node *n;
1462   size_t i;
1463
1464   n = pool_alloc (e->expr_pool, sizeof n->composite);
1465   n->type = op;
1466   n->composite.n_args = n_args;
1467   n->composite.args = pool_alloc (e->expr_pool,
1468                                   sizeof *n->composite.args * n_args);
1469   for (i = 0; i < n_args; i++)
1470     {
1471       if (args[i] == NULL)
1472         return NULL;
1473       n->composite.args[i] = args[i];
1474     }
1475   memcpy (n->composite.args, args, sizeof *n->composite.args * n_args);
1476   n->composite.min_valid = 0;
1477   assert (is_valid_node (n));
1478   return n;
1479 }
1480
1481 union any_node *
1482 expr_allocate_number (struct expression *e, double d)
1483 {
1484   union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1485   n->type = OP_number;
1486   n->number.n = d;
1487   return n;
1488 }
1489
1490 union any_node *
1491 expr_allocate_boolean (struct expression *e, double b)
1492 {
1493   union any_node *n = pool_alloc (e->expr_pool, sizeof n->number);
1494   assert (b == 0.0 || b == 1.0 || b == SYSMIS);
1495   n->type = OP_boolean;
1496   n->number.n = b;
1497   return n;
1498 }
1499
1500 union any_node *
1501 expr_allocate_integer (struct expression *e, int i)
1502 {
1503   union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1504   n->type = OP_integer;
1505   n->integer.i = i;
1506   return n;
1507 }
1508
1509 union any_node *
1510 expr_allocate_pos_int (struct expression *e, int i)
1511 {
1512   union any_node *n = pool_alloc (e->expr_pool, sizeof n->integer);
1513   assert (i > 0);
1514   n->type = OP_pos_int;
1515   n->integer.i = i;
1516   return n;
1517 }
1518
1519 union any_node *
1520 expr_allocate_vector (struct expression *e, const struct vector *vector)
1521 {
1522   union any_node *n = pool_alloc (e->expr_pool, sizeof n->vector);
1523   n->type = OP_vector;
1524   n->vector.v = vector;
1525   return n;
1526 }
1527
1528 union any_node *
1529 expr_allocate_string (struct expression *e, struct substring s)
1530 {
1531   union any_node *n = pool_alloc (e->expr_pool, sizeof n->string);
1532   n->type = OP_string;
1533   n->string.s = s;
1534   return n;
1535 }
1536
1537 union any_node *
1538 expr_allocate_variable (struct expression *e, const struct variable *v)
1539 {
1540   union any_node *n = pool_alloc (e->expr_pool, sizeof n->variable);
1541   n->type = var_is_numeric (v) ? OP_num_var : OP_str_var;
1542   n->variable.v = v;
1543   return n;
1544 }
1545
1546 union any_node *
1547 expr_allocate_format (struct expression *e, const struct fmt_spec *format)
1548 {
1549   union any_node *n = pool_alloc (e->expr_pool, sizeof n->format);
1550   n->type = OP_format;
1551   n->format.f = *format;
1552   return n;
1553 }
1554
1555 /* Allocates a unary composite node that represents the value of
1556    variable V in expression E. */
1557 static union any_node *
1558 allocate_unary_variable (struct expression *e, const struct variable *v)
1559 {
1560   assert (v != NULL);
1561   return expr_allocate_unary (e, var_is_numeric (v) ? OP_NUM_VAR : OP_STR_VAR,
1562                               expr_allocate_variable (e, v));
1563 }
1564 \f
1565 /* Export function details to other modules. */
1566
1567 /* Returns the operation structure for the function with the
1568    given IDX. */
1569 const struct operation *
1570 expr_get_function (size_t idx)
1571 {
1572   assert (idx < n_OP_function);
1573   return &operations[OP_function_first + idx];
1574 }
1575
1576 /* Returns the number of expression functions. */
1577 size_t
1578 expr_get_n_functions (void)
1579 {
1580   return n_OP_function;
1581 }
1582
1583 /* Returns the name of operation OP. */
1584 const char *
1585 expr_operation_get_name (const struct operation *op)
1586 {
1587   return op->name;
1588 }
1589
1590 /* Returns the human-readable prototype for operation OP. */
1591 const char *
1592 expr_operation_get_prototype (const struct operation *op)
1593 {
1594   return op->prototype;
1595 }
1596
1597 /* Returns the number of arguments for operation OP. */
1598 int
1599 expr_operation_get_n_args (const struct operation *op)
1600 {
1601   return op->n_args;
1602 }