X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fgraph.c;h=8157bb295887c762f2362005b6e8f4bfcb474dcf;hb=ffca729efecaa224bf1d71ba4b43af9222d7e8e3;hp=578efc1e756feec5755582798b380e792813c05b;hpb=6e097c89af440da90b43ce90864394c4d0c843d5;p=pspp diff --git a/src/language/stats/graph.c b/src/language/stats/graph.c index 578efc1e75..8157bb2958 100644 --- a/src/language/stats/graph.c +++ b/src/language/stats/graph.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc. + Copyright (C) 2012, 2013, 2015, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -58,8 +58,6 @@ #include "language/stats/freq.h" #include "language/stats/chart-category.h" -#include "output/tab.h" - #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid @@ -207,7 +205,8 @@ const struct ag_func ag_func[] = {"COUNT", N_("Count"), 0, 0, NULL, calc_mom0, 0, 0}, {"PCT", N_("Percentage"), 0, 0, NULL, calc_mom0, 0, post_percentage}, {"CUFREQ", N_("Cumulative Count"), 0, 1, NULL, calc_mom0, 0, 0}, - {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0, post_percentage}, + {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0, + post_percentage}, {"MEAN", N_("Mean"), 1, 0, NULL, calc_mom1, post_normalise, 0}, {"SUM", N_("Sum"), 1, 0, NULL, calc_mom1, 0, 0}, @@ -241,7 +240,7 @@ parse_function (struct lexer *lexer, struct graph *graph) if (!lex_force_match (lexer, T_LPAREN)) goto error; - graph->dep_vars = xzalloc (sizeof (graph->dep_vars) * graph->n_dep_vars); + graph->dep_vars = xcalloc (graph->n_dep_vars, sizeof (graph->dep_vars)); for (v = 0; v < ag_func[i].arity; ++v) { graph->dep_vars[v] = parse_variable (lexer, graph->dict); @@ -309,7 +308,8 @@ show_scatterplot (const struct graph *cmd, struct casereader *input) scatterplot = scatterplot_create (input, var_to_string(cmd->dep_vars[0]), var_to_string(cmd->dep_vars[1]), - (cmd->n_by_vars > 0) ? cmd->by_var[0] : NULL, + (cmd->n_by_vars > 0) ? cmd->by_var[0] + : NULL, &byvar_overflow, ds_cstr (&title), cmd->es[0].minimum, cmd->es[0].maximum, @@ -355,8 +355,8 @@ show_histogr (const struct graph *cmd, struct casereader *input) for (;(c = casereader_read (input)) != NULL; case_unref (c)) { - const double x = case_data_idx (c, HG_IDX_X)->f; - const double weight = case_data_idx (c, HG_IDX_WT)->f; + const double x = case_num_idx (c, HG_IDX_X); + const double weight = case_num_idx (c, HG_IDX_WT); moments_pass_two (cmd->es[0].mom, x, weight); histogram_add (histogram, x, weight); } @@ -373,8 +373,8 @@ show_histogr (const struct graph *cmd, struct casereader *input) moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL); - chart_item_submit - ( histogram_chart_create (histogram->gsl_hist, + chart_submit + (histogram_chart_create (histogram->gsl_hist, ds_cstr (&label), n, mean, sqrt (var), cmd->normal)); @@ -402,7 +402,7 @@ run_barchart (struct graph *cmd, struct casereader *input) struct casereader *group; double ccc = 0.0; - if ( cmd->missing_pw == false) + if (cmd->missing_pw == false) input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, @@ -413,9 +413,11 @@ run_barchart (struct graph *cmd, struct casereader *input) input = sort_execute (input, &cmd->ordering); - struct freq **freqs = NULL; - int n_freqs = 0; + struct freq **cells = NULL; + int n_cells = 0; + struct hmap columns = HMAP_INITIALIZER (columns); + assert (cmd->n_by_vars <= 2); for (grouper = casegrouper_create_vars (input, cmd->by_var, cmd->n_by_vars); casegrouper_get_next_group (grouper, &group); @@ -427,7 +429,9 @@ run_barchart (struct graph *cmd, struct casereader *input) /* Deal with missing values in the categorical variables */ for (v = 0; v < cmd->n_by_vars; ++v) { - if (var_is_value_missing (cmd->by_var[v], case_data (c, cmd->by_var[v]), cmd->fctr_excl) ) + if (var_is_value_missing (cmd->by_var[v], + case_data (c, cmd->by_var[v])) + & cmd->fctr_excl) break; } @@ -437,23 +441,46 @@ run_barchart (struct graph *cmd, struct casereader *input) continue; } - freqs = xrealloc (freqs, sizeof (*freqs) * ++n_freqs); - freqs[n_freqs - 1] = xzalloc (sizeof (**freqs) + - sizeof (union value) * (cmd->n_by_vars - 1) ); + cells = xrealloc (cells, sizeof (*cells) * ++n_cells); + cells[n_cells - 1] = xzalloc (sizeof (**cells) + + sizeof (union value) + * (cmd->n_by_vars - 1)); - if (ag_func[cmd->agr].cumulative && n_freqs >= 2) - freqs[n_freqs - 1]->count = freqs[n_freqs - 2]->count; + if (ag_func[cmd->agr].cumulative && n_cells >= 2) + cells[n_cells - 1]->count = cells[n_cells - 2]->count; else - freqs[n_freqs - 1]->count = 0; + cells[n_cells - 1]->count = 0; if (ag_func[cmd->agr].pre) - freqs[n_freqs - 1]->count = ag_func[cmd->agr].pre(); + cells[n_cells - 1]->count = ag_func[cmd->agr].pre(); + if (cmd->n_by_vars > 1) + { + const union value *vv = case_data (c, cmd->by_var[1]); + const double weight = dict_get_case_weight (cmd->dict, c, NULL); + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + + if (fcol) + fcol->count += weight; + else + { + fcol = xzalloc (sizeof *fcol); + fcol->count = weight; + value_clone (&fcol->values[0], vv, v1_width); + hmap_insert (&columns, &fcol->node, hash); + } + } for (v = 0; v < cmd->n_by_vars; ++v) { - value_clone (&freqs[n_freqs - 1]->values[v], case_data (c, cmd->by_var[v]), - var_get_width (cmd->by_var[v]) - ); + value_clone (&cells[n_cells - 1]->values[v], + case_data (c, cmd->by_var[v]), + var_get_width (cmd->by_var[v])); } case_unref (c); @@ -461,29 +488,60 @@ run_barchart (struct graph *cmd, struct casereader *input) for (;(c = casereader_read (group)) != NULL; case_unref (c)) { const double weight = dict_get_case_weight (cmd->dict,c,NULL); - const double x = (cmd->n_dep_vars > 0) ? case_data (c, cmd->dep_vars[0])->f : SYSMIS; + const double x = (cmd->n_dep_vars > 0) + ? case_num (c, cmd->dep_vars[0]) : SYSMIS; cc += weight; - freqs[n_freqs - 1]->count - = ag_func[cmd->agr].calc (freqs[n_freqs - 1]->count, x, weight); + cells[n_cells - 1]->count + = ag_func[cmd->agr].calc (cells[n_cells - 1]->count, x, weight); } if (ag_func[cmd->agr].post) - freqs[n_freqs - 1]->count - = ag_func[cmd->agr].post (freqs[n_freqs - 1]->count, cc); + cells[n_cells - 1]->count + = ag_func[cmd->agr].post (cells[n_cells - 1]->count, cc); ccc += cc; } casegrouper_destroy (grouper); - for (int i = 0; i < n_freqs; ++i) + for (int i = 0; i < n_cells; ++i) { if (ag_func[cmd->agr].ppost) - freqs[i]->count = ag_func[cmd->agr].ppost (freqs[i]->count, ccc); + { + struct freq *cell = cells[i]; + if (cmd->n_by_vars > 1) + { + const union value *vv = &cell->values[1]; + + int v1_width = var_get_width (cmd->by_var[1]); + size_t hash = value_hash (vv, v1_width, 0); + + struct freq *fcol = NULL; + HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns) + if (value_equal (vv, &fcol->values[0], v1_width)) + break; + + cell->count = ag_func[cmd->agr].ppost (cell->count, fcol->count); + } + else + cell->count = ag_func[cmd->agr].ppost (cell->count, ccc); + } } + if (cmd->n_by_vars > 1) + { + struct freq *col_cell; + struct freq *next; + HMAP_FOR_EACH_SAFE (col_cell, next, struct freq, node, &columns) + { + + value_destroy (col_cell->values, var_get_width (cmd->by_var[1])); + free (col_cell); + } + } + hmap_destroy (&columns); { struct string label; @@ -497,17 +555,17 @@ run_barchart (struct graph *cmd, struct casereader *input) ds_put_cstr (&label, ag_func[cmd->agr].description); - chart_item_submit (barchart_create (cmd->by_var, cmd->n_by_vars, - ds_cstr (&label), false, - freqs, n_freqs)); + chart_submit (barchart_create (cmd->by_var, cmd->n_by_vars, + ds_cstr (&label), false, + cells, n_cells)); ds_destroy (&label); } - for (int i = 0; i < n_freqs; ++i) - free (freqs[i]); + for (int i = 0; i < n_cells; ++i) + free (cells[i]); - free (freqs); + free (cells); } @@ -518,7 +576,7 @@ run_graph (struct graph *cmd, struct casereader *input) struct casereader *reader; struct casewriter *writer; - cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats)); + cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars, sizeof *cmd->es); for(int v=0;vn_dep_vars;v++) { cmd->es[v].mom = moments_create (MOMENT_KURTOSIS); @@ -529,7 +587,7 @@ run_graph (struct graph *cmd, struct casereader *input) /* Always remove cases listwise. This is correct for */ /* the histogram because there is only one variable */ /* and a simple bivariate scatterplot */ - /* if ( cmd->missing_pw == false) */ + /* if (cmd->missing_pw == false) */ input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, @@ -551,7 +609,7 @@ run_graph (struct graph *cmd, struct casereader *input) struct ccase *outcase = case_create (cmd->gr_proto); const double weight = dict_get_case_weight (cmd->dict,c,NULL); if (cmd->chart_type == CT_HISTOGRAM) - case_data_rw_idx (outcase, HG_IDX_WT)->f = weight; + *case_num_rw_idx (outcase, HG_IDX_WT) = weight; if (cmd->chart_type == CT_SCATTERPLOT && cmd->n_by_vars > 0) value_copy (case_data_rw_idx (outcase, SP_IDX_BY), case_data (c, cmd->by_var[0]), @@ -559,15 +617,15 @@ run_graph (struct graph *cmd, struct casereader *input) for(int v=0;vn_dep_vars;v++) { const struct variable *var = cmd->dep_vars[v]; - const double x = case_data (c, var)->f; + const double x = case_num (c, var); - if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl)) + if (var_is_value_missing (var, case_data (c, var)) & cmd->dep_excl) { cmd->es[v].missing += weight; continue; } /* Magically v value fits to SP_IDX_X, SP_IDX_Y, HG_IDX_X */ - case_data_rw_idx (outcase, v)->f = x; + *case_num_rw_idx (outcase, v) = x; if (x > cmd->es[v].maximum) cmd->es[v].maximum = x; @@ -743,7 +801,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) } else { - lex_error_expecting (lexer, "BIVARIATE", NULL); + lex_error_expecting (lexer, "BIVARIATE"); goto error; } if (!lex_force_match (lexer, T_RPAREN)) @@ -850,7 +908,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "REPORT")) { - graph.fctr_excl = MV_NEVER; + graph.fctr_excl = 0; } else if (lex_match_id (lexer, "NOREPORT")) { @@ -874,20 +932,28 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) { case CT_SCATTERPLOT: /* See scatterplot.h for the setup of the case prototype */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value - SP_IDX_X*/ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* y value - SP_IDX_Y*/ - /* The by_var contains the plot categories for the different xy plot colors */ + + /* x value - SP_IDX_X*/ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + + /* y value - SP_IDX_Y*/ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + /* The by_var contains the plot categories for the different xy + plot colors */ if (graph.n_by_vars > 0) /* SP_IDX_BY */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, var_get_width(graph.by_var[0])); + graph.gr_proto = caseproto_add_width (graph.gr_proto, + var_get_width(graph.by_var[0])); break; case CT_HISTOGRAM: - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value */ - graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* weight value */ + /* x value */ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); + /* weight value */ + graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); break; case CT_BAR: break; case CT_NONE: - lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR", NULL); + lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR"); goto error; default: NOT_REACHED (); @@ -911,7 +977,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; } - subcase_destroy (&graph.ordering); + subcase_uninit (&graph.ordering); free (graph.dep_vars); pool_destroy (graph.pool); caseproto_unref (graph.gr_proto); @@ -919,7 +985,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds) return CMD_SUCCESS; error: - subcase_destroy (&graph.ordering); + subcase_uninit (&graph.ordering); caseproto_unref (graph.gr_proto); free (graph.dep_vars); pool_destroy (graph.pool);