From 578bc59e7ec144bc8424e9a58807a791314cc0e8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 17 Sep 2022 09:43:42 -0700 Subject: [PATCH] MISSING VALUES: Improve error messages. --- src/data/missing-values.h | 2 + src/language/dictionary/missing-values.c | 119 ++++++++------------ tests/language/dictionary/missing-values.at | 38 ++++++- 3 files changed, 85 insertions(+), 74 deletions(-) diff --git a/src/data/missing-values.h b/src/data/missing-values.h index 8123beeda1..ec5a843e64 100644 --- a/src/data/missing-values.h +++ b/src/data/missing-values.h @@ -51,6 +51,8 @@ struct missing_values union value values[3]; /* Missing values. [1], [2] are the range. */ }; +#define MV_INIT_EMPTY_NUMERIC { .type = 0 } + /* Classes of missing values. These are useful as individual values and as masks, and they are used both diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index 683a6d6c91..187ddee316 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -41,14 +41,12 @@ int cmd_missing_values (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); - struct variable **v = NULL; - size_t nv; - - bool ok = true; while (lex_token (lexer) != T_ENDCMD) { - size_t i; + struct missing_values mv = MV_INIT_EMPTY_NUMERIC; + struct variable **v = NULL; + size_t nv; if (!parse_variables (lexer, dict, &v, &nv, PV_NONE)) goto error; @@ -56,36 +54,19 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) if (!lex_force_match (lexer, T_LPAREN)) goto error; - for (i = 0; i < nv; i++) - var_clear_missing_values (v[i]); - - int start_ofs = lex_ofs (lexer); - int end_ofs; - for (end_ofs = start_ofs; ; end_ofs++) + int values_start = lex_ofs (lexer); + int values_end; + for (values_end = values_start; ; values_end++) { - enum token_type next = lex_ofs_token (lexer, end_ofs + 1)->type; + enum token_type next = lex_ofs_token (lexer, values_end + 1)->type; if (next == T_RPAREN || next == T_ENDCMD || next == T_STOP) break; } if (!lex_match (lexer, T_RPAREN)) { - struct missing_values mv; - - for (i = 0; i < nv; i++) - if (var_get_type (v[i]) != var_get_type (v[0])) - { - const struct variable *n = var_is_numeric (v[0]) ? v[0] : v[i]; - const struct variable *s = var_is_numeric (v[0]) ? v[i] : v[0]; - msg (SE, _("Cannot mix numeric variables (e.g. %s) and " - "string variables (e.g. %s) within a single list."), - var_get_name (n), var_get_name (s)); - goto error; - } - if (var_is_numeric (v[0])) { - mv_init (&mv, 0); while (!lex_match (lexer, T_RPAREN)) { enum fmt_type type = var_get_print_format (v[0])->type; @@ -98,11 +79,11 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) ? mv_add_num (&mv, x) : mv_add_range (&mv, x, y))) { - lex_ofs_error (lexer, start_ofs, end_ofs, + lex_ofs_error (lexer, values_start, values_end, _("Too many numeric missing values. At " "most three individual values or one " "value and one range are allowed.")); - ok = false; + goto error; } lex_match (lexer, T_COMMA); @@ -115,75 +96,75 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, T_RPAREN)) { - const char *utf8_s; - size_t utf8_trunc_len; - size_t utf8_len; - - char *raw_s; - if (!lex_force_string (lexer)) - { - ok = false; - break; - } + goto error; /* Truncate the string to fit in 8 bytes in the dictionary encoding. */ - utf8_s = lex_tokcstr (lexer); - utf8_len = ss_length (lex_tokss (lexer)); - utf8_trunc_len = utf8_encoding_trunc_len (utf8_s, encoding, - MV_MAX_STRING); + const char *utf8_s = lex_tokcstr (lexer); + size_t utf8_len = ss_length (lex_tokss (lexer)); + size_t utf8_trunc_len = utf8_encoding_trunc_len ( + utf8_s, encoding, MV_MAX_STRING); if (utf8_trunc_len < utf8_len) lex_error (lexer, _("Truncating missing value to maximum " "acceptable length (%d bytes)."), MV_MAX_STRING); /* Recode to dictionary encoding and add. */ - raw_s = recode_string (encoding, "UTF-8", - utf8_s, utf8_trunc_len); - if (!mv_add_str (&mv, CHAR_CAST (const uint8_t *, raw_s), - strlen (raw_s))) + char *raw_s = recode_string (encoding, "UTF-8", + utf8_s, utf8_trunc_len); + bool ok = mv_add_str (&mv, CHAR_CAST (const uint8_t *, raw_s), + strlen (raw_s)); + free (raw_s); + if (!ok) { - lex_ofs_error (lexer, start_ofs, end_ofs, + lex_ofs_error (lexer, values_start, values_end, _("Too many string missing values. " "At most three individual values " "are allowed.")); - ok = false; + goto error; } - free (raw_s); lex_get (lexer); lex_match (lexer, T_COMMA); } } + } + lex_match (lexer, T_SLASH); + + bool ok = true; + for (size_t i = 0; i < nv; i++) + { + int var_width = var_get_width (v[i]); - for (i = 0; i < nv; i++) + if (mv_is_resizable (&mv, var_width)) + var_set_missing_values (v[i], &mv); + else { - if (mv_is_resizable (&mv, var_get_width (v[i]))) - var_set_missing_values (v[i], &mv); + ok = false; + if (!var_width) + lex_ofs_error (lexer, values_start, values_end, + _("Cannot assign string missing values to " + "numeric variable %s."), var_get_name (v[i])); else - { - lex_ofs_error (lexer, start_ofs, end_ofs, - _("Missing values are too long to assign " - "to variable %s with width %d."), - var_get_name (v[i]), var_get_width (v[i])); - ok = false; - } + lex_ofs_error (lexer, values_start, values_end, + _("Missing values are too long to assign " + "to variable %s with width %d."), + var_get_name (v[i]), var_get_width (v[i])); } - - mv_destroy (&mv); } + mv_destroy (&mv); + free (v); + if (!ok) + return CMD_FAILURE; + continue; - lex_match (lexer, T_SLASH); + error: + mv_destroy (&mv); free (v); - v = NULL; + return CMD_FAILURE; } - free (v); - return ok ? CMD_SUCCESS : CMD_FAILURE; - -error: - free (v); - return CMD_FAILURE; + return CMD_SUCCESS; } diff --git a/tests/language/dictionary/missing-values.at b/tests/language/dictionary/missing-values.at index a7732c7351..541a4cab45 100644 --- a/tests/language/dictionary/missing-values.at +++ b/tests/language/dictionary/missing-values.at @@ -183,11 +183,9 @@ AT_CHECK([pspp -O format=csv missing-values.sps], [1], [dnl 11 | MISSING VALUES str1 ('a' THRU 'z'). | ^~~~" -"missing-values.sps:11.26-11.29: error: MISSING VALUES: THRU is not a variable name. - 11 | MISSING VALUES str1 ('a' THRU 'z'). - | ^~~~" - -missing-values.sps:14: error: MISSING VALUES: Cannot mix numeric variables (e.g. num1) and string variables (e.g. str1) within a single list. +"missing-values.sps:14.27-14.31: error: MISSING VALUES: Cannot assign string missing values to numeric variable num1. + 14 | MISSING VALUES str1 num1 ('123'). + | ^~~~~" "missing-values.sps:17.22-17.31: error: MISSING VALUES: Too many numeric missing values. At most three individual values or one value and one range are allowed. 17 | MISSING VALUES num1 (1, 2, 3, 4). @@ -210,3 +208,33 @@ missing-values.sps:14: error: MISSING VALUES: Cannot mix numeric variables (e.g. | ^~~~~~~~" ]) AT_CLEANUP + +AT_SETUP([MISSING VALUES syntax errors]) +AT_DATA([missing-values.sps], [dnl +DATA LIST LIST NOTABLE/n1 to n10 (F8.2) s1 to s10 (A8). +MISSING VALUES **. +MISSING VALUES n1 **. +MISSING VALUES s1 (1). +MISSING VALUES n1 (1**). +]) +AT_DATA([insert.sps], [dnl +INSERT FILE='missing-values.sps' ERROR=IGNORE. +]) +AT_CHECK([pspp --testing-mode -O format=csv insert.sps], [1], [dnl +"missing-values.sps:2.16-2.17: error: MISSING VALUES: Syntax error expecting variable name. + 2 | MISSING VALUES **. + | ^~" + +"missing-values.sps:3.19-3.20: error: MISSING VALUES: Syntax error expecting `@{:@'. + 3 | MISSING VALUES n1 **. + | ^~" + +"missing-values.sps:4.20: error: MISSING VALUES: Syntax error expecting string. + 4 | MISSING VALUES s1 (1). + | ^" + +"missing-values.sps:5.21-5.22: error: MISSING VALUES: Syntax error expecting number. + 5 | MISSING VALUES n1 (1**). + | ^~" +]) +AT_CLEANUP \ No newline at end of file -- 2.30.2