X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fgraph.c;h=8157bb295887c762f2362005b6e8f4bfcb474dcf;hb=8a0397328b6230fd49724e1c6d91a5a545d2fb4b;hp=8af6a1515fefbdf1f2c363187d8f6fbf501b0ccb;hpb=612b51515e356bc4dd625a3fb18d0a4f827a1e2c;p=pspp diff --git a/src/language/stats/graph.c b/src/language/stats/graph.c index 8af6a1515f..8157bb2958 100644 --- a/src/language/stats/graph.c +++ b/src/language/stats/graph.c @@ -1,7 +1,7 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc. - + Copyright (C) 2012, 2013, 2015, 2019 Free Software Foundation, Inc. + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -11,7 +11,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -58,8 +58,6 @@ #include "language/stats/freq.h" #include "language/stats/chart-category.h" -#include "output/tab.h" - #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid @@ -142,10 +140,10 @@ struct graph enum bar_type bar_type; const struct variable *by_var[2]; size_t n_by_vars; - + struct subcase ordering; /* Ordering for aggregation */ int agr; /* Index into ag_func */ - + /* A caseproto that contains the plot data */ struct caseproto *gr_proto; }; @@ -207,7 +205,8 @@ const struct ag_func ag_func[] = {"COUNT", N_("Count"), 0, 0, NULL, calc_mom0, 0, 0}, {"PCT", N_("Percentage"), 0, 0, NULL, calc_mom0, 0, post_percentage}, {"CUFREQ", N_("Cumulative Count"), 0, 1, NULL, calc_mom0, 0, 0}, - {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0, post_percentage}, + {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0, + post_percentage}, {"MEAN", N_("Mean"), 1, 0, NULL, calc_mom1, post_normalise, 0}, {"SUM", N_("Sum"), 1, 0, NULL, calc_mom1, 0, 0}, @@ -241,10 +240,12 @@ parse_function (struct lexer *lexer, struct graph *graph) if (!lex_force_match (lexer, T_LPAREN)) goto error; - graph->dep_vars = xzalloc (sizeof (graph->dep_vars) * graph->n_dep_vars); + graph->dep_vars = xcalloc (graph->n_dep_vars, sizeof (graph->dep_vars)); for (v = 0; v < ag_func[i].arity; ++v) { graph->dep_vars[v] = parse_variable (lexer, graph->dict); + if (! graph->dep_vars[v]) + goto error; } if (!lex_force_match (lexer, T_RPAREN)) @@ -274,7 +275,7 @@ parse_function (struct lexer *lexer, struct graph *graph) } return true; - + error: lex_error (lexer, NULL); return false; @@ -307,7 +308,8 @@ show_scatterplot (const struct graph *cmd, struct casereader *input) scatterplot = scatterplot_create (input, var_to_string(cmd->dep_vars[0]), var_to_string(cmd->dep_vars[1]), - (cmd->n_by_vars > 0) ? cmd->by_var[0] : NULL, + (cmd->n_by_vars > 0) ? cmd->by_var[0] + : NULL, &byvar_overflow, ds_cstr (&title), cmd->es[0].minimum, cmd->es[0].maximum, @@ -329,6 +331,12 @@ show_histogr (const struct graph *cmd, struct casereader *input) struct histogram *histogram; struct ccase *c; + if (cmd->es[0].cc <= 0) + { + casereader_destroy (input); + return; + } + { /* Sturges Rule */ double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum) @@ -347,8 +355,8 @@ show_histogr (const struct graph *cmd, struct casereader *input) for (;(c = casereader_read (input)) != NULL; case_unref (c)) { - const double x = case_data_idx (c, HG_IDX_X)->f; - const double weight = case_data_idx (c, HG_IDX_WT)->f; + const double x = case_num_idx (c, HG_IDX_X); + const double weight = case_num_idx (c, HG_IDX_WT); moments_pass_two (cmd->es[0].mom, x, weight); histogram_add (histogram, x, weight); } @@ -360,24 +368,24 @@ show_histogr (const struct graph *cmd, struct casereader *input) struct string label; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[0])); moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL); - chart_item_submit - ( histogram_chart_create (histogram->gsl_hist, + chart_submit + (histogram_chart_create (histogram->gsl_hist, ds_cstr (&label), n, mean, sqrt (var), cmd->normal)); - statistic_destroy (&histogram->parent); + statistic_destroy (&histogram->parent); ds_destroy (&label); } } static void cleanup_exploratory_stats (struct graph *cmd) -{ +{ int v; for (v = 0; v < cmd->n_dep_vars; ++v) @@ -394,7 +402,7 @@ run_barchart (struct graph *cmd, struct casereader *input) struct casereader *group; double ccc = 0.0; - if ( cmd->missing_pw == false) + if (cmd->missing_pw == false) input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, @@ -405,9 +413,11 @@ run_barchart (struct graph *cmd, struct casereader *input) input = sort_execute (input, &cmd->ordering); - struct freq **freqs = NULL; - int n_freqs = 0; + struct freq **cells = NULL; + int n_cells = 0; + struct hmap columns = HMAP_INITIALIZER (columns); + assert (cmd->n_by_vars <= 2); for (grouper = casegrouper_create_vars (input, cmd->by_var, cmd->n_by_vars); casegrouper_get_next_group (grouper, &group); @@ -419,7 +429,9 @@ run_barchart (struct graph *cmd, struct casereader *input) /* Deal with missing values in the categorical variables */ for (v = 0; v < cmd->n_by_vars; ++v) { - if (var_is_value_missing (cmd->by_var[v], case_data (c, cmd->by_var[v]), cmd->fctr_excl) ) + if (var_is_value_missing (cmd->by_var[v], + case_data (c, cmd->by_var[v])) + & cmd->fctr_excl) break; } @@ -429,23 +441,46 @@ run_barchart (struct graph *cmd, struct casereader *input) continue; } - freqs = xrealloc (freqs, sizeof (*freqs) * ++n_freqs); - freqs[n_freqs - 1] = xzalloc (sizeof (**freqs) + - sizeof (union value) * (cmd->n_by_vars - 1) ); + cells = xrealloc (cells, sizeof (*cells) * ++n_cells); + cells[n_cells - 1] = xzalloc (sizeof (**cells) + + sizeof (union value) + * (cmd->n_by_vars - 1)); - if (ag_func[cmd->agr].cumulative && n_freqs >= 2) - freqs[n_freqs - 1]->count = freqs[n_freqs - 2]->count; + if (ag_func[cmd->agr].cumulative && n_cells >= 2) + cells[n_cells - 1]->count = cells[n_cells - 2]->count; else - freqs[n_freqs - 1]->count = 0; + cells[n_cells - 1]->count = 0; if (ag_func[cmd->agr].pre) - freqs[n_freqs - 1]->count = ag_func[cmd->agr].pre(); + cells[n_cells - 1]->count = ag_func[cmd->agr].pre(); + + if (cmd->n_by_vars > 1) + { + const union value *vv = case_data (c, cmd->by_var[1]); + const double weight = dict_get_case_weight (cmd->dict, c, NULL); + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + if (fcol) + fcol->count += weight; + else + { + fcol = xzalloc (sizeof *fcol); + fcol->count = weight; + value_clone (&fcol->values[0], vv, v1_width); + hmap_insert (&columns, &fcol->node, hash); + } + } for (v = 0; v < cmd->n_by_vars; ++v) { - value_clone (&freqs[n_freqs - 1]->values[v], case_data (c, cmd->by_var[v]), - var_get_width (cmd->by_var[v]) - ); + value_clone (&cells[n_cells - 1]->values[v], + case_data (c, cmd->by_var[v]), + var_get_width (cmd->by_var[v])); } case_unref (c); @@ -453,29 +488,60 @@ run_barchart (struct graph *cmd, struct casereader *input) for (;(c = casereader_read (group)) != NULL; case_unref (c)) { const double weight = dict_get_case_weight (cmd->dict,c,NULL); - const double x = (cmd->n_dep_vars > 0) ? case_data (c, cmd->dep_vars[0])->f : SYSMIS; + const double x = (cmd->n_dep_vars > 0) + ? case_num (c, cmd->dep_vars[0]) : SYSMIS; cc += weight; - - freqs[n_freqs - 1]->count - = ag_func[cmd->agr].calc (freqs[n_freqs - 1]->count, x, weight); + + cells[n_cells - 1]->count + = ag_func[cmd->agr].calc (cells[n_cells - 1]->count, x, weight); } if (ag_func[cmd->agr].post) - freqs[n_freqs - 1]->count - = ag_func[cmd->agr].post (freqs[n_freqs - 1]->count, cc); + cells[n_cells - 1]->count + = ag_func[cmd->agr].post (cells[n_cells - 1]->count, cc); ccc += cc; } casegrouper_destroy (grouper); - for (int i = 0; i < n_freqs; ++i) + for (int i = 0; i < n_cells; ++i) { if (ag_func[cmd->agr].ppost) - freqs[i]->count = ag_func[cmd->agr].ppost (freqs[i]->count, ccc); + { + struct freq *cell = cells[i]; + if (cmd->n_by_vars > 1) + { + const union value *vv = &cell->values[1]; + + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + + cell->count = ag_func[cmd->agr].ppost (cell->count, fcol->count); + } + else + cell->count = ag_func[cmd->agr].ppost (cell->count, ccc); + } } + if (cmd->n_by_vars > 1) + { + struct freq *col_cell; + struct freq *next; + HMAP_FOR_EACH_SAFE (col_cell, next, struct freq, node, &columns) + { + + value_destroy (col_cell->values, var_get_width (cmd->by_var[1])); + free (col_cell); + } + } + hmap_destroy (&columns); { struct string label; @@ -486,20 +552,20 @@ run_barchart (struct graph *cmd, struct casereader *input) ag_func[cmd->agr].description, var_get_name (cmd->dep_vars[0])); else - ds_put_cstr (&label, + ds_put_cstr (&label, ag_func[cmd->agr].description); - - chart_item_submit (barchart_create (cmd->by_var, cmd->n_by_vars, - ds_cstr (&label), - freqs, n_freqs)); + + chart_submit (barchart_create (cmd->by_var, cmd->n_by_vars, + ds_cstr (&label), false, + cells, n_cells)); ds_destroy (&label); } - for (int i = 0; i < n_freqs; ++i) - free (freqs[i]); - - free (freqs); + for (int i = 0; i < n_cells; ++i) + free (cells[i]); + + free (cells); } @@ -510,7 +576,7 @@ run_graph (struct graph *cmd, struct casereader *input) struct casereader *reader; struct casewriter *writer; - cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats)); + cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars, sizeof *cmd->es); for(int v=0;vn_dep_vars;v++) { cmd->es[v].mom = moments_create (MOMENT_KURTOSIS); @@ -521,7 +587,7 @@ run_graph (struct graph *cmd, struct casereader *input) /* Always remove cases listwise. This is correct for */ /* the histogram because there is only one variable */ /* and a simple bivariate scatterplot */ - /* if ( cmd->missing_pw == false) */ + /* if (cmd->missing_pw == false) */ input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, @@ -543,7 +609,7 @@ run_graph (struct graph *cmd, struct casereader *input) struct ccase *outcase = case_create (cmd->gr_proto); const double weight = dict_get_case_weight (cmd->dict,c,NULL); if (cmd->chart_type == CT_HISTOGRAM) - case_data_rw_idx (outcase, HG_IDX_WT)->f = weight; + *case_num_rw_idx (outcase, HG_IDX_WT) = weight; if (cmd->chart_type == CT_SCATTERPLOT && cmd->n_by_vars > 0) value_copy (case_data_rw_idx (outcase, SP_IDX_BY), case_data (c, cmd->by_var[0]), @@ -551,15 +617,15 @@ run_graph (struct graph *cmd, struct casereader *input) for(int v=0;vn_dep_vars;v++) { const struct variable *var = cmd->dep_vars[v]; - const double x = case_data (c, var)->f; + const double x = case_num (c, var); - if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl)) + if (var_is_value_missing (var, case_data (c, var)) & cmd->dep_excl) { cmd->es[v].missing += weight; continue; } /* Magically v value fits to SP_IDX_X, SP_IDX_Y, HG_IDX_X */ - case_data_rw_idx (outcase, v)->f = x; + *case_num_rw_idx (outcase, v) = x; if (x > cmd->es[v].maximum) cmd->es[v].maximum = x; @@ -605,14 +671,14 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) struct graph graph; graph.missing_pw = false; - + graph.pool = pool_create (); graph.dep_excl = MV_ANY; graph.fctr_excl = MV_ANY; - + graph.dict = dataset_dict (ds); - + graph.dep_vars = NULL; graph.chart_type = CT_NONE; graph.scatter_type = ST_BIVARIATE; @@ -620,7 +686,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) graph.gr_proto = caseproto_create (); subcase_init_empty (&graph.ordering); - + while (lex_token (lexer) != T_ENDCMD) { lex_match (lexer, T_SLASH); @@ -637,7 +703,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) { if (!lex_force_match_id (lexer, "NORMAL")) goto error; - + if (!lex_force_match (lexer, T_RPAREN)) goto error; @@ -665,27 +731,27 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) } graph.chart_type = CT_BAR; graph.bar_type = CBT_SIMPLE; - - if (lex_match (lexer, T_LPAREN)) + + if (lex_match (lexer, T_LPAREN)) { if (lex_match_id (lexer, "SIMPLE")) { /* This is the default anyway */ } - else if (lex_match_id (lexer, "GROUPED")) + else if (lex_match_id (lexer, "GROUPED")) { - graph.bar_type = CBT_GROUPED; + graph.bar_type = CBT_GROUPED; goto error; } - else if (lex_match_id (lexer, "STACKED")) + else if (lex_match_id (lexer, "STACKED")) { - graph.bar_type = CBT_STACKED; + graph.bar_type = CBT_STACKED; lex_error (lexer, _("%s is not yet implemented."), "STACKED"); goto error; } - else if (lex_match_id (lexer, "RANGE")) + else if (lex_match_id (lexer, "RANGE")) { - graph.bar_type = CBT_RANGE; + graph.bar_type = CBT_RANGE; lex_error (lexer, _("%s is not yet implemented."), "RANGE"); goto error; } @@ -697,7 +763,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) if (!lex_force_match (lexer, T_RPAREN)) goto error; } - + if (!lex_force_match (lexer, T_EQUALS)) goto error; @@ -712,30 +778,30 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) goto error; } graph.chart_type = CT_SCATTERPLOT; - if (lex_match (lexer, T_LPAREN)) + if (lex_match (lexer, T_LPAREN)) { if (lex_match_id (lexer, "BIVARIATE")) { /* This is the default anyway */ } - else if (lex_match_id (lexer, "OVERLAY")) + else if (lex_match_id (lexer, "OVERLAY")) { lex_error (lexer, _("%s is not yet implemented."),"OVERLAY"); goto error; } - else if (lex_match_id (lexer, "MATRIX")) + else if (lex_match_id (lexer, "MATRIX")) { lex_error (lexer, _("%s is not yet implemented."),"MATRIX"); goto error; } - else if (lex_match_id (lexer, "XYZ")) + else if (lex_match_id (lexer, "XYZ")) { lex_error(lexer, _("%s is not yet implemented."),"XYZ"); goto error; } else { - lex_error_expecting (lexer, "BIVARIATE", NULL); + lex_error_expecting (lexer, "BIVARIATE"); goto error; } if (!lex_force_match (lexer, T_RPAREN)) @@ -748,7 +814,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) &graph.dep_vars, &graph.n_dep_vars, PV_NO_DUPLICATE | PV_NUMERIC)) goto error; - + if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1) { lex_error(lexer, _("Only one variable is allowed.")); @@ -768,7 +834,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) lex_error (lexer, _("Only one variable is allowed.")); goto error; } - + if (lex_match (lexer, T_BY)) { const struct variable *v = NULL; @@ -778,6 +844,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) goto error; } graph.by_var[0] = v; + graph.n_by_vars = 1; } } else if (lex_match_id (lexer, "LINE")) @@ -841,7 +908,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "REPORT")) { - graph.fctr_excl = MV_NEVER; + graph.fctr_excl = 0; } else if (lex_match_id (lexer, "NOREPORT")) { @@ -865,20 +932,28 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) { case CT_SCATTERPLOT: /* See scatterplot.h for the setup of the case prototype */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value - SP_IDX_X*/ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* y value - SP_IDX_Y*/ - /* The by_var contains the plot categories for the different xy plot colors */ + + /* x value - SP_IDX_X*/ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + + /* y value - SP_IDX_Y*/ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + /* The by_var contains the plot categories for the different xy + plot colors */ if (graph.n_by_vars > 0) /* SP_IDX_BY */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, var_get_width(graph.by_var[0])); + graph.gr_proto = caseproto_add_width (graph.gr_proto, + var_get_width(graph.by_var[0])); break; case CT_HISTOGRAM: - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* weight value */ + /* x value */ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + /* weight value */ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); break; case CT_BAR: break; case CT_NONE: - lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR", NULL); + lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR"); goto error; default: NOT_REACHED (); @@ -889,7 +964,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) struct casegrouper *grouper; struct casereader *group; bool ok; - + grouper = casegrouper_create_splits (proc_open (ds), graph.dict); while (casegrouper_get_next_group (grouper, &group)) { @@ -902,7 +977,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; } - subcase_destroy (&graph.ordering); + subcase_uninit (&graph.ordering); free (graph.dep_vars); pool_destroy (graph.pool); caseproto_unref (graph.gr_proto); @@ -910,7 +985,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) return CMD_SUCCESS; error: - subcase_destroy (&graph.ordering); + subcase_uninit (&graph.ordering); caseproto_unref (graph.gr_proto); free (graph.dep_vars); pool_destroy (graph.pool);