From 0379abd58b39391a678df2add2bd896503b37a41 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 22 Jun 2019 08:46:38 +0200 Subject: [PATCH] MEANS: Fix behaviour when splits is active. --- src/language/stats/means-parser.c | 61 +------------- src/language/stats/means.c | 133 ++++++++++++++++++++++-------- src/language/stats/means.h | 9 +- tests/language/stats/means.at | 45 ++++++++++ 4 files changed, 148 insertions(+), 100 deletions(-) diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c index 50e25d9c34..0c5916a452 100644 --- a/src/language/stats/means-parser.c +++ b/src/language/stats/means-parser.c @@ -24,13 +24,9 @@ #include "data/format.h" #include "data/variable.h" -#include "language/command.h" #include "language/lexer/lexer.h" #include "language/lexer/variable-parser.h" -#include "libpspp/hmap.h" -#include "libpspp/bt.h" -#include "libpspp/misc.h" #include "libpspp/pool.h" #include "means.h" @@ -89,7 +85,7 @@ lex_is_variable (struct lexer *lexer, const struct dictionary *dict, return true; } -static bool +bool means_parse (struct lexer *lexer, struct means *means) { /* Optional TABLES = */ @@ -248,58 +244,3 @@ means_parse (struct lexer *lexer, struct means *means) } return true; } - - -int -cmd_means (struct lexer *lexer, struct dataset *ds) -{ - struct means means; - means.pool = pool_create (); - - means.ctrl_exclude = MV_ANY; - means.dep_exclude = MV_ANY; - means.table = NULL; - means.n_tables = 0; - - means.dict = dataset_dict (ds); - - means.n_statistics = 3; - means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics); - means.statistics[0] = MEANS_MEAN; - means.statistics[1] = MEANS_N; - means.statistics[2] = MEANS_STDDEV; - - if (! means_parse (lexer, &means)) - goto error; - - { - struct casegrouper *grouper; - struct casereader *group; - bool ok; - - grouper = casegrouper_create_splits (proc_open (ds), means.dict); - while (casegrouper_get_next_group (grouper, &group)) - { - run_means (&means, group, ds); - } - ok = casegrouper_destroy (grouper); - ok = proc_commit (ds) && ok; - } - - for (int t = 0; t < means.n_tables; ++t) - { - const struct mtable *table = means.table + t; - - means_case_processing_summary (table); - means_shipout (table, &means); - } - destroy_means (&means); - pool_destroy (means.pool); - return CMD_SUCCESS; - - error: - - destroy_means (&means); - pool_destroy (means.pool); - return CMD_FAILURE; -} diff --git a/src/language/stats/means.c b/src/language/stats/means.c index 37fa8873ae..c30b66c310 100644 --- a/src/language/stats/means.c +++ b/src/language/stats/means.c @@ -27,6 +27,10 @@ #include "libpspp/hmap.h" #include "libpspp/bt.h" #include "libpspp/hash-functions.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" + +#include "language/command.h" #include "count-one-bits.h" #include "count-leading-zeros.h" @@ -116,7 +120,7 @@ destroy_workspace (const struct mtable *mt, struct workspace *ws) struct instance *inst; struct instance *next; HMAP_FOR_EACH_SAFE (inst, next, struct instance, hmap_node, - &instances->map) + &instances->map) { int width = var_get_width (inst->var); value_destroy (&inst->value, width); @@ -185,10 +189,10 @@ means_destroy_cells (const struct means *means, struct cell *cell, struct cell *sub_cell; struct cell *next; HMAP_FOR_EACH_SAFE (sub_cell, next, struct cell, hmap_node, - &container->map) - { - means_destroy_cells (means, sub_cell, table); - } + &container->map) + { + means_destroy_cells (means, sub_cell, table); + } } destroy_cell (means, table, cell); @@ -674,7 +678,7 @@ populate_case_processing_summary (struct pivot_category *pc, } /* Create the "Case Processing Summary" table. */ -void +static void means_case_processing_summary (const struct mtable *mt) { struct pivot_table *pt = pivot_table_create (N_("Case Processing Summary")); @@ -811,7 +815,7 @@ means_shipout_multivar (const struct mtable *mt, const struct means *means, pivot_table_submit (pt); } -void +static void means_shipout (const struct mtable *mt, const struct means *means) { for (int cmb = 0; cmb < mt->n_combinations; ++cmb) @@ -955,13 +959,6 @@ prepare_means (struct means *cmd) { struct mtable *mt = cmd->table + t; - mt->n_combinations = 1; - for (int l = 0; l < mt->n_layers; ++l) - mt->n_combinations *= mt->layers[l]->n_factor_vars; - - mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws)); - mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars - * sizeof (*mt->summ)); for (int i = 0; i < mt->n_combinations; ++i) { struct workspace *ws = mt->ws + i; @@ -1100,29 +1097,95 @@ run_means (struct means *cmd, struct casereader *input, post_means (cmd); } +struct lexer; -/* Release all resources allocated by this routine. - This does not include those allocated by the parser, - which exclusively use MEANS->pool. */ -void -destroy_means (struct means *means) +int +cmd_means (struct lexer *lexer, struct dataset *ds) { - for (int t = 0; t < means->n_tables; ++t) + struct means means; + means.pool = pool_create (); + + means.ctrl_exclude = MV_ANY; + means.dep_exclude = MV_ANY; + means.table = NULL; + means.n_tables = 0; + + means.dict = dataset_dict (ds); + + means.n_statistics = 3; + means.statistics = pool_calloc (means.pool, 3, sizeof *means.statistics); + means.statistics[0] = MEANS_MEAN; + means.statistics[1] = MEANS_N; + means.statistics[2] = MEANS_STDDEV; + + if (! means_parse (lexer, &means)) + goto error; + + /* Calculate some constant data for each table. */ + for (int t = 0; t < means.n_tables; ++t) { - const struct mtable *table = means->table + t; - for (int i = 0; i < table->n_combinations; ++i) - { - struct workspace *ws = table->ws + i; - if (ws->root_cell == NULL) - continue; - means_destroy_cells (means, ws->root_cell, table); - } - for (int i = 0; i < table->n_combinations; ++i) - { - struct workspace *ws = table->ws + i; - destroy_workspace (table, ws); - } - free (table->ws); - free (table->summ); + struct mtable *mt = means.table + t; + mt->n_combinations = 1; + for (int l = 0; l < mt->n_layers; ++l) + mt->n_combinations *= mt->layers[l]->n_factor_vars; } + + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), means.dict); + while (casegrouper_get_next_group (grouper, &group)) + { + /* Allocate the workspaces. */ + for (int t = 0; t < means.n_tables; ++t) + { + struct mtable *mt = means.table + t; + mt->summ = xzalloc (mt->n_combinations * mt->n_dep_vars + * sizeof (*mt->summ)); + mt->ws = xzalloc (mt->n_combinations * sizeof (*mt->ws)); + } + run_means (&means, group, ds); + for (int t = 0; t < means.n_tables; ++t) + { + const struct mtable *mt = means.table + t; + + means_case_processing_summary (mt); + means_shipout (mt, &means); + + for (int i = 0; i < mt->n_combinations; ++i) + { + struct workspace *ws = mt->ws + i; + if (ws->root_cell == NULL) + continue; + + means_destroy_cells (&means, ws->root_cell, mt); + } + } + + /* Destroy the workspaces. */ + for (int t = 0; t < means.n_tables; ++t) + { + struct mtable *mt = means.table + t; + free (mt->summ); + for (int i = 0; i < mt->n_combinations; ++i) + { + struct workspace *ws = mt->ws + i; + destroy_workspace (mt, ws); + } + free (mt->ws); + } + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } + + pool_destroy (means.pool); + return CMD_SUCCESS; + + error: + + pool_destroy (means.pool); + return CMD_FAILURE; } diff --git a/src/language/stats/means.h b/src/language/stats/means.h index 9d93a4f07a..b2bdd1387f 100644 --- a/src/language/stats/means.h +++ b/src/language/stats/means.h @@ -17,6 +17,8 @@ #ifndef MEANS_H #define MEANS_H +#include "libpspp/hmap.h" +#include "libpspp/bt.h" #include "libpspp/compiler.h" struct cell_container @@ -152,12 +154,9 @@ struct dataset; struct casereader; void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED); -void means_shipout (const struct mtable *mt, const struct means *means); +struct lexer; +bool means_parse (struct lexer *lexer, struct means *means); -void means_case_processing_summary (const struct mtable *mt); - - -void destroy_means (struct means *means); #endif diff --git a/tests/language/stats/means.at b/tests/language/stats/means.at index c6e298b5af..c9aeeb7a0a 100644 --- a/tests/language/stats/means.at +++ b/tests/language/stats/means.at @@ -1044,4 +1044,49 @@ Total,17,22.00 AT_CLEANUP +dnl Make sure that behaviour with SPLIT is correct. +AT_SETUP([MEANS split]) +AT_KEYWORDS([categorical categoricals]) + +AT_DATA([means-split.sps], [dnl +data list notable list /b g *. +begin data +2 0 +2 0 +4 0 +4 0 +11 1 +11 1 +end data. + +split file by g. + +means b /cells = count mean. +]) + +AT_CHECK([pspp -O format=csv means-split.sps], [0], [dnl +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +b,4,100.0%,0,.0%,4,100.0% + +Table: Report +N,Mean +4,3.00 + +Table: Case Processing Summary +,Cases,,,,, +,Included,,Excluded,,Total, +,N,Percent,N,Percent,N,Percent +b,2,100.0%,0,.0%,2,100.0% + +Table: Report +N,Mean +2,11.00 +]) + +AT_CLEANUP + + -- 2.30.2