From: Ben Pfaff Date: Mon, 21 Nov 2022 06:25:33 +0000 (-0800) Subject: MEANS: Improve error messages and coding style. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=09e70bd965a78b9831d51e93270d24932ed00b5b;p=pspp MEANS: Improve error messages and coding style. --- diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c index 6bbae89385..0cc0b1b317 100644 --- a/src/language/stats/means-parser.c +++ b/src/language/stats/means-parser.c @@ -73,52 +73,67 @@ static bool lex_is_variable (struct lexer *lexer, const struct dictionary *dict, int n) { - const char *tstr; - if (lex_next_token (lexer, n) != T_ID) + if (lex_next_token (lexer, n) != T_ID) return false; - tstr = lex_next_tokcstr (lexer, n); + const char *tstr = lex_next_tokcstr (lexer, n); + return dict_lookup_var (dict, tstr) != NULL; +} - if (NULL == dict_lookup_var (dict, tstr)) - return false; +static const struct cell_spec * +match_cell (struct lexer *lexer) +{ + for (size_t i = 0; i < n_MEANS_STATISTICS; ++i) + { + const struct cell_spec *cs = &cell_spec[i]; + if (lex_match_id (lexer, cs->keyword)) + return cs; + } + return NULL; +} - return true; +static void +add_statistic (struct means *means, int statistic) +{ + if (means->n_statistics >= means->allocated_statistics) + means->statistics = pool_2nrealloc (means->pool, means->statistics, + &means->allocated_statistics, + sizeof *means->statistics); + means->statistics[means->n_statistics++] = statistic; +} + +void +means_set_default_statistics (struct means *means) +{ + means->n_statistics = 0; + add_statistic (means, MEANS_MEAN); + add_statistic (means, MEANS_N); + add_statistic (means, MEANS_STDDEV); } bool means_parse (struct lexer *lexer, struct means *means) { - /* Optional TABLES = */ - if (lex_match_id (lexer, "TABLES")) - { - if (! lex_force_match (lexer, T_EQUALS)) - return false; - } + /* Optional TABLES=. */ + if (lex_match_id (lexer, "TABLES") && !lex_force_match (lexer, T_EQUALS)) + return false; - bool more_tables = true; /* Parse the "tables" */ - while (more_tables) + for (;;) { means->table = pool_realloc (means->pool, means->table, - (means->n_tables + 1) * sizeof (*means->table)); + (means->n_tables + 1) * sizeof *means->table); - if (! parse_means_table_syntax (lexer, means, - &means->table[means->n_tables])) - { - return false; - } - means->n_tables ++; + if (!parse_means_table_syntax (lexer, means, + &means->table[means->n_tables])) + return false; + means->n_tables++; /* Look ahead to see if there are more tables to be parsed */ - more_tables = false; - if (T_SLASH == lex_next_token (lexer, 0)) - { - if (lex_is_variable (lexer, means->dict, 1)) - { - more_tables = true; - lex_match (lexer, T_SLASH); - } - } + if (lex_next_token (lexer, 0) != T_SLASH + || !lex_is_variable (lexer, means->dict, 1)) + break; + lex_match (lexer, T_SLASH); } /* /MISSING subcommand */ @@ -128,43 +143,36 @@ means_parse (struct lexer *lexer, struct means *means) if (lex_match_id (lexer, "MISSING")) { - /* - If no MISSING subcommand is specified, each combination of - a dependent variable and categorical variables is handled - separately. - */ + /* If no MISSING subcommand is specified, each combination of a + dependent variable and categorical variables is handled + separately. */ lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "INCLUDE")) { - /* - Use the subcommand "/MISSING=INCLUDE" to include user-missing - values in the analysis. - */ + /* Use the subcommand "/MISSING=INCLUDE" to include user-missing + values in the analysis. */ means->ctrl_exclude = MV_SYSTEM; means->dep_exclude = MV_SYSTEM; } else if (lex_match_id (lexer, "DEPENDENT")) - /* - Use the command "/MISSING=DEPENDENT" to - include user-missing values for the categorical variables, - while excluding them for the dependent variables. - - Cases are dropped only when user-missing values - appear in dependent variables. User-missing - values for categorical variables are treated according to - their face value. - - Cases are ALWAYS dropped when System Missing values appear - in the categorical variables. - */ + /* Use the command "/MISSING=DEPENDENT" to include user-missing + values for the categorical variables, while excluding them for + the dependent variables. + + Cases are dropped only when user-missing values appear in + dependent variables. User-missing values for categorical + variables are treated according to their face value. + + Cases are ALWAYS dropped when System Missing values appear in + the categorical variables. */ { means->dep_exclude = MV_ANY; means->ctrl_exclude = MV_SYSTEM; } else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "INCLUDE", "DEPENDENT"); return false; } } @@ -174,63 +182,30 @@ means_parse (struct lexer *lexer, struct means *means) /* The default values become overwritten */ means->n_statistics = 0; - pool_free (means->pool, means->statistics); - means->statistics = 0; - while (lex_token (lexer) != T_ENDCMD - && lex_token (lexer) != T_SLASH) + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { if (lex_match (lexer, T_ALL)) { - pool_free (means->pool, means->statistics); - means->statistics = pool_calloc (means->pool, - n_MEANS_STATISTICS, - sizeof (*means->statistics)); - means->n_statistics = n_MEANS_STATISTICS; - int i; - for (i = 0; i < n_MEANS_STATISTICS; ++i) - { - means->statistics[i] = i; - } + means->n_statistics = 0; + for (int i = 0; i < n_MEANS_STATISTICS; ++i) + add_statistic (means, i); } else if (lex_match_id (lexer, "NONE")) - { - means->n_statistics = 0; - pool_free (means->pool, means->statistics); - means->statistics = 0; - } + means->n_statistics = 0; else if (lex_match_id (lexer, "DEFAULT")) + means_set_default_statistics (means); + else { - pool_free (means->pool, means->statistics); - means->statistics = pool_calloc (means->pool, - 3, - sizeof *means->statistics); - means->statistics[0] = MEANS_MEAN; - means->statistics[1] = MEANS_N; - means->statistics[2] = MEANS_STDDEV; - } - else - { - int i; - for (i = 0; i < n_MEANS_STATISTICS; ++i) - { - const struct cell_spec *cs = cell_spec + i; - if (lex_match_id (lexer, cs->keyword)) - { - means->statistics - = pool_realloc (means->pool, - means->statistics, - (means->n_statistics + 1) - * sizeof (*means->statistics)); - - means->statistics[means->n_statistics] = i; - means->n_statistics++; - break; - } - } - - if (i >= n_MEANS_STATISTICS) + const struct cell_spec *cs = match_cell (lexer); + if (cs) + add_statistic (means, cs - cell_spec); + else { - lex_error (lexer, NULL); + const char *keywords[n_MEANS_STATISTICS]; + for (int i = 0; i < n_MEANS_STATISTICS; ++i) + keywords[i] = cell_spec[i].keyword; + lex_error_expecting_array (lexer, keywords, + n_MEANS_STATISTICS); return false; } } @@ -238,7 +213,7 @@ means_parse (struct lexer *lexer, struct means *means) } else { - lex_error (lexer, NULL); + lex_error_expecting (lexer, "MISSING", "CELLS"); return false; } } diff --git a/src/language/stats/means.c b/src/language/stats/means.c index 4ebd60d83a..c1337ad949 100644 --- a/src/language/stats/means.c +++ b/src/language/stats/means.c @@ -31,6 +31,7 @@ #include "libpspp/pool.h" #include "language/command.h" +#include "language/lexer/lexer.h" #include "count-one-bits.h" #include "count-leading-zeros.h" @@ -234,7 +235,7 @@ dump_cell (const struct cell *cell, const struct mtable *mt, int level) static void dump_indeces (const size_t *indexes, int n) { - for (int i = 0 ; i < n; ++i) + for (int i = 0; i < n; ++i) { printf ("%ld; ", indexes[i]); } @@ -601,7 +602,7 @@ create_table_structure (const struct mtable *mt, struct pivot_table *pt, int * lindexes = ws->control_idx; /* The inner layers are situated rightmost in the table. So this iteration is in reverse order. */ - for (int l = mt->n_layers -1; l >=0 ; --l) + for (int l = mt->n_layers - 1; l >= 0; --l) { const struct layer *layer = mt->layers[l]; const struct cell_container *instances = ws->instances + l; @@ -1102,28 +1103,18 @@ run_means (struct means *cmd, struct casereader *input, post_means (cmd); } -struct lexer; - int cmd_means (struct lexer *lexer, struct dataset *ds) { - struct means means; - means.pool = pool_create (); - - means.ctrl_exclude = MV_ANY; - means.dep_exclude = MV_ANY; - means.table = NULL; - means.n_tables = 0; - - means.dict = dataset_dict (ds); - - means.n_statistics = 3; - means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics); - means.statistics[0] = MEANS_MEAN; - means.statistics[1] = MEANS_N; - means.statistics[2] = MEANS_STDDEV; - - if (! means_parse (lexer, &means)) + struct means means = { + .pool = pool_create (), + .ctrl_exclude = MV_ANY, + .dep_exclude = MV_ANY, + .dict = dataset_dict (ds), + }; + means_set_default_statistics (&means); + + if (!means_parse (lexer, &means)) goto error; /* Calculate some constant data for each table. */ @@ -1135,62 +1126,58 @@ cmd_means (struct lexer *lexer, struct dataset *ds) mt->n_combinations *= mt->layers[l]->n_factor_vars; } - { - struct casegrouper *grouper; - struct casereader *group; - bool ok; - - grouper = casegrouper_create_splits (proc_open (ds), means.dict); - while (casegrouper_get_next_group (grouper, &group)) - { - /* Allocate the workspaces. */ - for (int t = 0; t < means.n_tables; ++t) + struct casegrouper *grouper + = casegrouper_create_splits (proc_open (ds), means.dict); + struct casereader *group; + while (casegrouper_get_next_group (grouper, &group)) + { + /* Allocate the workspaces. */ + for (int t = 0; t < means.n_tables; ++t) { struct mtable *mt = means.table + t; mt->summ = xcalloc (mt->n_combinations * mt->n_dep_vars, - sizeof (*mt->summ)); - mt->ws = xcalloc (mt->n_combinations, sizeof (*mt->ws)); + sizeof *mt->summ); + mt->ws = xcalloc (mt->n_combinations, sizeof *mt->ws); } - run_means (&means, group, ds); - for (int t = 0; t < means.n_tables; ++t) - { - const struct mtable *mt = means.table + t; + run_means (&means, group, ds); + for (int t = 0; t < means.n_tables; ++t) + { + const struct mtable *mt = means.table + t; - means_case_processing_summary (mt); - means_shipout (mt, &means); + means_case_processing_summary (mt); + means_shipout (mt, &means); - for (int i = 0; i < mt->n_combinations; ++i) - { - struct workspace *ws = mt->ws + i; - if (ws->root_cell == NULL) - continue; + for (int i = 0; i < mt->n_combinations; ++i) + { + struct workspace *ws = mt->ws + i; + if (ws->root_cell) + means_destroy_cells (&means, ws->root_cell, mt); + } + } - means_destroy_cells (&means, ws->root_cell, mt); - } - } + /* Destroy the workspaces. */ + for (int t = 0; t < means.n_tables; ++t) + { + struct mtable *mt = means.table + t; + free (mt->summ); + for (int i = 0; i < mt->n_combinations; ++i) + { + struct workspace *ws = mt->ws + i; + destroy_workspace (mt, ws); + } + free (mt->ws); + } + } - /* Destroy the workspaces. */ - for (int t = 0; t < means.n_tables; ++t) - { - struct mtable *mt = means.table + t; - free (mt->summ); - for (int i = 0; i < mt->n_combinations; ++i) - { - struct workspace *ws = mt->ws + i; - destroy_workspace (mt, ws); - } - free (mt->ws); - } - } - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - } + bool ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + if (!ok) + goto error; pool_destroy (means.pool); return CMD_SUCCESS; error: - pool_destroy (means.pool); return CMD_FAILURE; } diff --git a/src/language/stats/means.h b/src/language/stats/means.h index b2bdd1387f..1ce86868e1 100644 --- a/src/language/stats/means.h +++ b/src/language/stats/means.h @@ -21,6 +21,10 @@ #include "libpspp/bt.h" #include "libpspp/compiler.h" +struct casereader; +struct dataset; +struct lexer; + struct cell_container { /* A hash table containing the cells. The table is indexed by a hash @@ -127,6 +131,7 @@ struct means /* The statistics to be calculated for each cell. */ int *statistics; int n_statistics; + size_t allocated_statistics; /* Pool on which cell functions may allocate data. */ struct pool *pool; @@ -148,15 +153,8 @@ enum MEANS_STDDEV }; - - -struct dataset; -struct casereader; -void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED); - -struct lexer; -bool means_parse (struct lexer *lexer, struct means *means); - - +void run_means (struct means *, struct casereader *, const struct dataset *); +bool means_parse (struct lexer *, struct means *); +void means_set_default_statistics (struct means *); #endif diff --git a/tests/language/stats/means.at b/tests/language/stats/means.at index 72f98b3d1f..f25fe04aec 100644 --- a/tests/language/stats/means.at +++ b/tests/language/stats/means.at @@ -1120,5 +1120,34 @@ Range,02:01:00,$1.14 AT_CLEANUP - - +AT_SETUP([MEANS syntax errors]) +AT_DATA([means.sps], [dnl +DATA LIST LIST NOTABLE/x y z. +MEANS TABLES **. +MEANS x BY **. +MEANS x/MISSING=**. +MEANS x/CELLS=**. +MEANS x/ **. +]) +AT_CHECK([pspp -O format=csv means.sps], [1], [dnl +"means.sps:2.14-2.15: error: MEANS: Syntax error expecting `='. + 2 | MEANS TABLES **. + | ^~" + +"means.sps:3.12-3.13: error: MEANS: Syntax error expecting variable name. + 3 | MEANS x BY **. + | ^~" + +"means.sps:4.17-4.18: error: MEANS: Syntax error expecting INCLUDE or DEPENDENT. + 4 | MEANS x/MISSING=**. + | ^~" + +"means.sps:5.15-5.16: error: MEANS: Syntax error expecting one of the following: MEAN, COUNT, STDDEV, SEMEAN, SUM, MIN, MAX, RANGE, VARIANCE, KURT, SEKURT, SKEW, SESKEW, FIRST, LAST, HARMONIC, GEOMETRIC. + 5 | MEANS x/CELLS=**. + | ^~" + +"means.sps:6.10-6.11: error: MEANS: Syntax error expecting MISSING or CELLS. + 6 | MEANS x/ **. + | ^~" +]) +AT_CLEANUP \ No newline at end of file