From 14dfdfa4c447272f3f8dc2203ba16e18ae72433c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 7 Mar 2021 11:33:00 -0800 Subject: [PATCH] Improve error message for creating a new string var with COMPUTE or IF. --- src/language/control/do-if.c | 2 +- src/language/control/loop.c | 7 +- src/language/data-io/inpt-pgm.c | 2 +- src/language/data-io/print-space.c | 2 +- src/language/expressions/parse.c | 114 +++++++++++++++++++--------- src/language/expressions/public.h | 28 +++---- src/language/xforms/compute.c | 35 ++++----- src/language/xforms/select-if.c | 2 +- tests/language/expressions/parse.at | 25 +++++- 9 files changed, 139 insertions(+), 78 deletions(-) diff --git a/src/language/control/do-if.c b/src/language/control/do-if.c index 83de0e39a3..ccda670658 100644 --- a/src/language/control/do-if.c +++ b/src/language/control/do-if.c @@ -201,7 +201,7 @@ parse_clause (struct lexer *lexer, struct do_if_trns *do_if, struct dataset *ds) { struct expression *condition; - condition = expr_parse (lexer, ds, EXPR_BOOLEAN); + condition = expr_parse_bool (lexer, NULL, ds); if (condition == NULL) return CMD_CASCADING_FAILURE; diff --git a/src/language/control/loop.c b/src/language/control/loop.c index bfd364ccd1..765d213147 100644 --- a/src/language/control/loop.c +++ b/src/language/control/loop.c @@ -194,7 +194,7 @@ parse_if_clause (struct lexer *lexer, return false; } - *condition = expr_parse_pool (lexer, loop->pool, loop->ds, EXPR_BOOLEAN); + *condition = expr_parse_bool (lexer, loop->pool, loop->ds); return *condition != NULL; } @@ -232,8 +232,7 @@ parse_index_clause (struct dataset *ds, struct lexer *lexer, if (!lex_force_match (lexer, T_EQUALS)) return false; - loop->first_expr = expr_parse_pool (lexer, loop->pool, - loop->ds, EXPR_NUMBER); + loop->first_expr = expr_parse (lexer, loop->pool, loop->ds, VAL_NUMERIC); if (loop->first_expr == NULL) return false; @@ -252,7 +251,7 @@ parse_index_clause (struct dataset *ds, struct lexer *lexer, lex_sbc_only_once (e == &loop->last_expr ? "TO" : "BY"); return false; } - *e = expr_parse_pool (lexer, loop->pool, loop->ds, EXPR_NUMBER); + *e = expr_parse (lexer, loop->pool, loop->ds, VAL_NUMERIC); if (*e == NULL) return false; } diff --git a/src/language/data-io/inpt-pgm.c b/src/language/data-io/inpt-pgm.c index d7523f58d2..e2cfbd1bb8 100644 --- a/src/language/data-io/inpt-pgm.c +++ b/src/language/data-io/inpt-pgm.c @@ -304,7 +304,7 @@ cmd_reread (struct lexer *lexer, struct dataset *ds) goto error; } - e = expr_parse (lexer, ds, EXPR_NUMBER); + e = expr_parse (lexer, NULL, ds, VAL_NUMERIC); if (!e) goto error; } diff --git a/src/language/data-io/print-space.c b/src/language/data-io/print-space.c index e166df5f63..f077e9c395 100644 --- a/src/language/data-io/print-space.c +++ b/src/language/data-io/print-space.c @@ -77,7 +77,7 @@ cmd_print_space (struct lexer *lexer, struct dataset *ds) if (lex_token (lexer) != T_ENDCMD) { - expr = expr_parse (lexer, ds, EXPR_NUMBER); + expr = expr_parse (lexer, NULL, ds, VAL_NUMERIC); if (lex_token (lexer) != T_ENDCMD) { lex_error (lexer, _("expecting end of command")); diff --git a/src/language/expressions/parse.c b/src/language/expressions/parse.c index 79889082ea..57307bf306 100644 --- a/src/language/expressions/parse.c +++ b/src/language/expressions/parse.c @@ -59,49 +59,98 @@ atom_type expr_node_returns (const union any_node *); static const char *atom_type_name (atom_type); static struct expression *finish_expression (union any_node *, struct expression *); -static bool type_check (struct expression *, union any_node **, - enum expr_type expected_type); +static bool type_check (const union any_node *, enum val_type expected_type); static union any_node *allocate_unary_variable (struct expression *, const struct variable *); /* Public functions. */ -/* Parses an expression of the given TYPE. - If DICT is nonnull then variables and vectors within it may be - referenced within the expression; otherwise, the expression - must not reference any variables or vectors. - Returns the new expression if successful or a null pointer - otherwise. */ +/* Parses an expression of the given TYPE. If DS is nonnull then variables and + vectors within it may be referenced within the expression; otherwise, the + expression must not reference any variables or vectors. Returns the new + expression if successful or a null pointer otherwise. If POOL is nonnull, + then destroying POOL will free the expression; otherwise, the caller must + eventually free it with expr_free(). */ struct expression * -expr_parse (struct lexer *lexer, struct dataset *ds, enum expr_type type) +expr_parse (struct lexer *lexer, struct pool *pool, struct dataset *ds, + enum val_type type) { - union any_node *n; - struct expression *e; + assert (val_type_is_valid (type)); - assert (type == EXPR_NUMBER || type == EXPR_STRING || type == EXPR_BOOLEAN); + struct expression *e = expr_create (ds); + union any_node *n = parse_or (lexer, e); + if (!n || !type_check (n, type)) + { + expr_free (e); + return NULL; + } - e = expr_create (ds); - n = parse_or (lexer, e); - if (n != NULL && type_check (e, &n, type)) - return finish_expression (expr_optimize (n, e), e); - else + e = finish_expression (expr_optimize (n, e), e); + if (pool) + pool_add_subpool (pool, e->expr_pool); + return e; +} + +/* Parses a boolean expression, otherwise similar to expr_parse(). */ +struct expression * +expr_parse_bool (struct lexer *lexer, struct pool *pool, struct dataset *ds) +{ + struct expression *e = expr_create (ds); + union any_node *n = parse_or (lexer, e); + if (!n) { expr_free (e); return NULL; } + + atom_type actual_type = expr_node_returns (n); + if (actual_type == OP_number) + n = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, n, + expr_allocate_string (e, ss_empty ())); + else if (actual_type != OP_boolean) + { + msg (SE, _("Type mismatch: expression has %s type, " + "but a boolean value is required here."), + atom_type_name (actual_type)); + expr_free (e); + return NULL; + } + + e = finish_expression (expr_optimize (n, e), e); + if (pool) + pool_add_subpool (pool, e->expr_pool); + return e; } -/* Parses and returns an expression of the given TYPE, as - expr_parse(), and sets up so that destroying POOL will free - the expression as well. */ +/* Parses a numeric expression that is intended to be assigned to newly created + variable NEW_VAR_NAME. (This allows for a better error message if the + expression is not numeric.) Otherwise similar to expr_parse(). */ struct expression * -expr_parse_pool (struct lexer *lexer, - struct pool *pool, - struct dataset *ds, - enum expr_type type) +expr_parse_new_variable (struct lexer *lexer, struct pool *pool, struct dataset *ds, + const char *new_var_name) { - struct expression *e = expr_parse (lexer, ds, type); - if (e != NULL) + struct expression *e = expr_create (ds); + union any_node *n = parse_or (lexer, e); + if (!n) + { + expr_free (e); + return NULL; + } + + atom_type actual_type = expr_node_returns (n); + if (actual_type != OP_number && actual_type != OP_boolean) + { + msg (SE, _("This command tries to create a new variable %s by assigning a " + "string value to it, but this is not supported. Use " + "the STRING command to create the new variable with the " + "correct width before assigning to it, e.g. STRING %s(A20)."), + new_var_name, new_var_name); + expr_free (e); + return NULL; + } + + e = finish_expression (expr_optimize (n, e), e); + if (pool) pool_add_subpool (pool, e->expr_pool); return e; } @@ -247,15 +296,13 @@ finish_expression (union any_node *n, struct expression *e) converted to type EXPECTED_TYPE, inserting a conversion at *N if necessary. Returns true if successful, false on failure. */ static bool -type_check (struct expression *e, - union any_node **n, enum expr_type expected_type) +type_check (const union any_node *n, enum val_type expected_type) { - atom_type actual_type = expr_node_returns (*n); + atom_type actual_type = expr_node_returns (n); switch (expected_type) { - case EXPR_BOOLEAN: - case EXPR_NUMBER: + case VAL_NUMERIC: if (actual_type != OP_number && actual_type != OP_boolean) { msg (SE, _("Type mismatch: expression has %s type, " @@ -263,12 +310,9 @@ type_check (struct expression *e, atom_type_name (actual_type)); return false; } - if (actual_type == OP_number && expected_type == EXPR_BOOLEAN) - *n = expr_allocate_binary (e, OP_NUM_TO_BOOLEAN, *n, - expr_allocate_string (e, ss_empty ())); break; - case EXPR_STRING: + case VAL_STRING: if (actual_type != OP_string) { msg (SE, _("Type mismatch: expression has %s type, " diff --git a/src/language/expressions/public.h b/src/language/expressions/public.h index 3bddb4b8a1..1059713065 100644 --- a/src/language/expressions/public.h +++ b/src/language/expressions/public.h @@ -19,27 +19,23 @@ #include -/* Expression parsing flags. */ -enum expr_type - { - EXPR_NUMBER = 0xf000, /* Number. */ - EXPR_STRING, /* String. */ - EXPR_BOOLEAN, /* Boolean (number limited to 0, 1, SYSMIS). */ - }; +#include "data/val-type.h" +struct ccase; +struct dataset; struct dictionary; struct expression; -struct ccase; +struct lexer; struct pool; union value; -struct dataset ; -struct lexer ; - -struct expression *expr_parse (struct lexer *lexer, struct dataset *, enum expr_type); -struct expression *expr_parse_pool (struct lexer *, - struct pool *, - struct dataset *, - enum expr_type); + +struct expression *expr_parse (struct lexer *lexer, struct pool *, + struct dataset *, enum val_type); +struct expression *expr_parse_bool (struct lexer *lexer, struct pool *, + struct dataset *); +struct expression *expr_parse_new_variable (struct lexer *lexer, struct pool *, + struct dataset *, + const char *new_var_name); void expr_free (struct expression *); struct dataset; diff --git a/src/language/xforms/compute.c b/src/language/xforms/compute.c index 00f7c5ad47..aa81729827 100644 --- a/src/language/xforms/compute.c +++ b/src/language/xforms/compute.c @@ -41,6 +41,18 @@ struct compute_trns; struct lvalue; +/* COMPUTE or IF target variable or vector element. + For a variable, the `variable' member is non-null. + For a vector element, the `vector' member is non-null. */ +struct lvalue + { + struct variable *variable; /* Destination variable. */ + bool is_new_variable; /* Did we create the variable? */ + + const struct vector *vector; /* Destination vector, if any, or NULL. */ + struct expression *element; /* Destination vector element, or NULL. */ + }; + /* Target of a COMPUTE or IF assignment, either a variable or a vector element. */ static struct lvalue *lvalue_parse (struct lexer *lexer, struct dataset *); @@ -237,7 +249,7 @@ cmd_if (struct lexer *lexer, struct dataset *ds) compute = compute_trns_create (); /* Test expression. */ - compute->test = expr_parse (lexer, ds, EXPR_BOOLEAN); + compute->test = expr_parse_bool (lexer, NULL, ds); if (compute->test == NULL) goto fail; @@ -284,9 +296,10 @@ static struct expression * parse_rvalue (struct lexer *lexer, const struct lvalue *lvalue, struct dataset *ds) { - bool is_numeric = lvalue_get_type (lvalue) == VAL_NUMERIC; - - return expr_parse (lexer, ds, is_numeric ? EXPR_NUMBER : EXPR_STRING); + if (lvalue->is_new_variable) + return expr_parse_new_variable (lexer, NULL, ds, var_get_name (lvalue->variable)); + else + return expr_parse (lexer, NULL, ds, lvalue_get_type (lvalue)); } /* Returns a new struct compute_trns after initializing its fields. */ @@ -318,18 +331,6 @@ compute_trns_free (void *compute_) return true; } -/* COMPUTE or IF target variable or vector element. - For a variable, the `variable' member is non-null. - For a vector element, the `vector' member is non-null. */ -struct lvalue - { - struct variable *variable; /* Destination variable. */ - bool is_new_variable; /* Did we create the variable? */ - - const struct vector *vector; /* Destination vector, if any, or NULL. */ - struct expression *element; /* Destination vector element, or NULL. */ - }; - /* Parses the target variable or vector element into a new `struct lvalue', which is returned. */ static struct lvalue * @@ -361,7 +362,7 @@ lvalue_parse (struct lexer *lexer, struct dataset *ds) lex_get (lexer); if (!lex_force_match (lexer, T_LPAREN)) goto lossage; - lvalue->element = expr_parse (lexer, ds, EXPR_NUMBER); + lvalue->element = expr_parse (lexer, NULL, ds, VAL_NUMERIC); if (lvalue->element == NULL) goto lossage; if (!lex_force_match (lexer, T_RPAREN)) diff --git a/src/language/xforms/select-if.c b/src/language/xforms/select-if.c index 4110f9089b..72284f3653 100644 --- a/src/language/xforms/select-if.c +++ b/src/language/xforms/select-if.c @@ -50,7 +50,7 @@ cmd_select_if (struct lexer *lexer, struct dataset *ds) struct expression *e; struct select_if_trns *t; - e = expr_parse (lexer, ds, EXPR_BOOLEAN); + e = expr_parse_bool (lexer, NULL, ds); if (!e) return CMD_CASCADING_FAILURE; diff --git a/tests/language/expressions/parse.at b/tests/language/expressions/parse.at index 8e4166c599..02b713f8b9 100644 --- a/tests/language/expressions/parse.at +++ b/tests/language/expressions/parse.at @@ -38,13 +38,23 @@ parse.sps:11: error: Stopping syntax file processing here to avoid a cascade of ]) AT_CLEANUP -AT_SETUP([parsing numeric expression with type mismatch]) +AT_SETUP([parsing boolean expression with type mismatch]) AT_DATA([parse.sps], [dnl DATA LIST NOTABLE/x 1(A). IF 'foo'. ]) AT_CHECK([pspp -O format=csv parse.sps], [1], [dnl -"parse.sps:2: error: IF: Type mismatch: expression has string type, but a numeric value is required here." +"parse.sps:2: error: IF: Type mismatch: expression has string type, but a boolean value is required here." +]) +AT_CLEANUP + +AT_SETUP([parsing numeric expression with type mismatch]) +AT_DATA([parse.sps], [dnl +DATA LIST NOTABLE/x 1. +COMPUTE x='foo'. +]) +AT_CHECK([pspp -O format=csv parse.sps], [1], [dnl +"parse.sps:2: error: COMPUTE: Type mismatch: expression has string type, but a numeric value is required here." ]) AT_CLEANUP @@ -59,6 +69,17 @@ AT_CHECK([pspp -O format=csv parse.sps], [1], [dnl ]) AT_CLEANUP +AT_SETUP([assigning string expression to new variable]) +AT_KEYWORDS([expression negative]) +AT_DATA([parse.sps], [dnl +DATA LIST NOTABLE/x 1(A). +COMPUTE y='a'. +]) +AT_CHECK([pspp -O format=csv parse.sps], [1], [dnl +"parse.sps:2: error: COMPUTE: This command tries to create a new variable y by assigning a string value to it, but this is not supported. Use the STRING command to create the new variable with the correct width before assigning to it, e.g. STRING y(A20)." +]) +AT_CLEANUP + AT_SETUP([parse expression with unknown system variable]) AT_KEYWORDS([expression negative]) AT_DATA([parse.sps], [dnl -- 2.30.2