From e425c8f3444e1cb6ac5c9be967bcd204c2774c77 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sun, 5 May 2019 14:09:47 +0200 Subject: [PATCH] GRAPH (barchart): Percentages display subcategorial percentages. When calculating the values for a categorical barchart displaying percentages, use the sub-category totals as the denominator, rather than the primary totals. Fixes bug #56247 --- NEWS | 4 ++ src/language/stats/graph.c | 57 ++++++++++++++++++++++- tests/language/stats/graph.at | 88 +++++++++++++++++++++++++++++++++++ 3 files changed, 147 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index e745639b4c..5d51210d42 100644 --- a/NEWS +++ b/NEWS @@ -30,6 +30,10 @@ Changes from 1.2.0 to 1.3.0: * The GUI now has a sub-dialog for post-hoc tests of the oneway anova test. + * GRAPH: A categorical barcharts displaying percentages now show the + sub-category as a percentage of the primary category instead of the + total. + Changes from 1.0.1 to 1.2.0: * New experimental command SAVE DATA COLLECTION to save MDD files. diff --git a/src/language/stats/graph.c b/src/language/stats/graph.c index 2d60467845..93ecc72b65 100644 --- a/src/language/stats/graph.c +++ b/src/language/stats/graph.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc. + Copyright (C) 2012, 2013, 2015, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -416,6 +416,7 @@ run_barchart (struct graph *cmd, struct casereader *input) struct freq **cells = NULL; int n_cells = 0; + struct hmap columns = HMAP_INITIALIZER (columns); assert (cmd->n_by_vars <= 2); for (grouper = casegrouper_create_vars (input, cmd->by_var, cmd->n_by_vars); @@ -452,6 +453,28 @@ run_barchart (struct graph *cmd, struct casereader *input) if (ag_func[cmd->agr].pre) cells[n_cells - 1]->count = ag_func[cmd->agr].pre(); + if (cmd->n_by_vars > 1) + { + const union value *vv = case_data (c, cmd->by_var[1]); + const double weight = dict_get_case_weight (cmd->dict, c, NULL); + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + + if (fcol) + fcol->count += weight; + else + { + fcol = xzalloc (sizeof *fcol); + fcol->count = weight; + value_clone (&fcol->values[0], vv, v1_width); + hmap_insert (&columns, &fcol->node, hash); + } + } for (v = 0; v < cmd->n_by_vars; ++v) { @@ -486,9 +509,39 @@ run_barchart (struct graph *cmd, struct casereader *input) for (int i = 0; i < n_cells; ++i) { if (ag_func[cmd->agr].ppost) - cells[i]->count = ag_func[cmd->agr].ppost (cells[i]->count, ccc); + { + struct freq *cell = cells[i]; + if (cmd->n_by_vars > 1) + { + const union value *vv = &cell->values[1]; + + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + + cell->count = ag_func[cmd->agr].ppost (cell->count, fcol->count); + } + else + cell->count = ag_func[cmd->agr].ppost (cell->count, ccc); + } } + if (cmd->n_by_vars > 1) + { + struct freq *col_cell; + struct freq *next; + HMAP_FOR_EACH_SAFE (col_cell, next, struct freq, node, &columns) + { + + value_destroy (col_cell->values, var_get_width (cmd->by_var[1])); + free (col_cell); + } + } + hmap_destroy (&columns); { struct string label; diff --git a/tests/language/stats/graph.at b/tests/language/stats/graph.at index 6a8089c0a2..b61d8a522f 100644 --- a/tests/language/stats/graph.at +++ b/tests/language/stats/graph.at @@ -347,3 +347,91 @@ AT_CHECK([pspp -o pspp.pdf barchart.sps], [1], [ignore]) dnl Ignore output -- this is just a no-crash check. AT_CLEANUP + + +dnl Check that percentages are calculated with respect to the +dnl proper total. See bug #56247 +AT_SETUP([GRAPH barchart percentage sub-categorical]) +AT_DATA([barchart.sps], [dnl +data list list notable /penalty_favopp_x * XYdem_racethW8 * w *. +begin data. +1 0 1960 +1 1 376 +2 0 678 +2 1 147 +4 0 368 +4 1 164 +5 0 427 +5 1 274 +. . 1522 +end data. + +weight by w. + +* crosstabs +* /tables=penalty_favopp_x by XYdem_racethW8 +* /format=AVALUE TABLES PIVOT +* /statistics=CHISQ +* /cells COUNT COLUMN TOTAL. + +graph + /bar=pct by penalty_favopp_x + . + +graph + /bar=pct by penalty_favopp_x by XYdem_racethW8 + . +]) + +AT_CHECK([pspp --testing barchart.sps], [0], [dnl +Graphic: Barchart +Percentage: 0 +Total Categories: 4 +Primary Categories: 4 +Largest Category: 53.1634 +Total Count: 100 +Y Label: "Percentage" +Categorical Variables: + Var: "penalty_favopp_x" +Categories: + 0 " 1.00" + 2 " 4.00" + 3 " 5.00" + 1 " 2.00" +All Categories: +Count: 53.1634; Cat: " 1.00" +Count: 18.7756; Cat: " 2.00" +Count: 12.1074; Cat: " 4.00" +Count: 15.9536; Cat: " 5.00" + +Graphic: Barchart +Percentage: 0 +Total Categories: 8 +Primary Categories: 4 +Largest Category: 57.0929 +Total Count: 200 +Y Label: "Percentage" +Categorical Variables: + Var: "penalty_favopp_x" + Var: "XYdem_racethW8" +Categories: + 0 " 1.00" + 2 " 4.00" + 3 " 5.00" + 1 " 2.00" +Sub-categories: + 0 " .00" + 1 " 1.00" +All Categories: +Count: 57.0929; Cat: " 1.00", " .00" +Count: 39.1259; Cat: " 1.00", " 1.00" +Count: 19.7495; Cat: " 2.00", " .00" +Count: 15.2966; Cat: " 2.00", " 1.00" +Count: 10.7195; Cat: " 4.00", " .00" +Count: 17.0656; Cat: " 4.00", " 1.00" +Count: 12.4381; Cat: " 5.00", " .00" +Count: 28.512; Cat: " 5.00", " 1.00" + +]) + +AT_CLEANUP -- 2.30.2