X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=inline;f=src%2Flanguage%2Fstats%2Fgraph.c;h=147d654ac484d392f263c8ebbb323e7a95bd97d8;hb=40fb94bbe38b3e444df7952631ec5fe83f7f1086;hp=8af6a1515fefbdf1f2c363187d8f6fbf501b0ccb;hpb=612b51515e356bc4dd625a3fb18d0a4f827a1e2c;p=pspp
diff --git a/src/language/stats/graph.c b/src/language/stats/graph.c
index 8af6a1515f..147d654ac4 100644
--- a/src/language/stats/graph.c
+++ b/src/language/stats/graph.c
@@ -1,7 +1,7 @@
/*
PSPP - a program for statistical analysis.
- Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
-
+ Copyright (C) 2012, 2013, 2015, 2019 Free Software Foundation, Inc.
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
@@ -11,7 +11,7 @@
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
@@ -58,8 +58,6 @@
#include "language/stats/freq.h"
#include "language/stats/chart-category.h"
-#include "output/tab.h"
-
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
@@ -142,10 +140,10 @@ struct graph
enum bar_type bar_type;
const struct variable *by_var[2];
size_t n_by_vars;
-
+
struct subcase ordering; /* Ordering for aggregation */
int agr; /* Index into ag_func */
-
+
/* A caseproto that contains the plot data */
struct caseproto *gr_proto;
};
@@ -207,7 +205,8 @@ const struct ag_func ag_func[] =
{"COUNT", N_("Count"), 0, 0, NULL, calc_mom0, 0, 0},
{"PCT", N_("Percentage"), 0, 0, NULL, calc_mom0, 0, post_percentage},
{"CUFREQ", N_("Cumulative Count"), 0, 1, NULL, calc_mom0, 0, 0},
- {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0, post_percentage},
+ {"CUPCT", N_("Cumulative Percent"), 0, 1, NULL, calc_mom0, 0,
+ post_percentage},
{"MEAN", N_("Mean"), 1, 0, NULL, calc_mom1, post_normalise, 0},
{"SUM", N_("Sum"), 1, 0, NULL, calc_mom1, 0, 0},
@@ -245,6 +244,8 @@ parse_function (struct lexer *lexer, struct graph *graph)
for (v = 0; v < ag_func[i].arity; ++v)
{
graph->dep_vars[v] = parse_variable (lexer, graph->dict);
+ if (! graph->dep_vars[v])
+ goto error;
}
if (!lex_force_match (lexer, T_RPAREN))
@@ -274,7 +275,7 @@ parse_function (struct lexer *lexer, struct graph *graph)
}
return true;
-
+
error:
lex_error (lexer, NULL);
return false;
@@ -307,7 +308,8 @@ show_scatterplot (const struct graph *cmd, struct casereader *input)
scatterplot = scatterplot_create (input,
var_to_string(cmd->dep_vars[0]),
var_to_string(cmd->dep_vars[1]),
- (cmd->n_by_vars > 0) ? cmd->by_var[0] : NULL,
+ (cmd->n_by_vars > 0) ? cmd->by_var[0]
+ : NULL,
&byvar_overflow,
ds_cstr (&title),
cmd->es[0].minimum, cmd->es[0].maximum,
@@ -329,6 +331,12 @@ show_histogr (const struct graph *cmd, struct casereader *input)
struct histogram *histogram;
struct ccase *c;
+ if (cmd->es[0].cc <= 0)
+ {
+ casereader_destroy (input);
+ return;
+ }
+
{
/* Sturges Rule */
double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
@@ -360,24 +368,24 @@ show_histogr (const struct graph *cmd, struct casereader *input)
struct string label;
- ds_init_cstr (&label,
+ ds_init_cstr (&label,
var_to_string (cmd->dep_vars[0]));
moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
- chart_item_submit
- ( histogram_chart_create (histogram->gsl_hist,
+ chart_submit
+ (histogram_chart_create (histogram->gsl_hist,
ds_cstr (&label), n, mean,
sqrt (var), cmd->normal));
- statistic_destroy (&histogram->parent);
+ statistic_destroy (&histogram->parent);
ds_destroy (&label);
}
}
static void
cleanup_exploratory_stats (struct graph *cmd)
-{
+{
int v;
for (v = 0; v < cmd->n_dep_vars; ++v)
@@ -394,7 +402,7 @@ run_barchart (struct graph *cmd, struct casereader *input)
struct casereader *group;
double ccc = 0.0;
- if ( cmd->missing_pw == false)
+ if (cmd->missing_pw == false)
input = casereader_create_filter_missing (input,
cmd->dep_vars,
cmd->n_dep_vars,
@@ -405,9 +413,11 @@ run_barchart (struct graph *cmd, struct casereader *input)
input = sort_execute (input, &cmd->ordering);
- struct freq **freqs = NULL;
- int n_freqs = 0;
+ struct freq **cells = NULL;
+ int n_cells = 0;
+ struct hmap columns = HMAP_INITIALIZER (columns);
+ assert (cmd->n_by_vars <= 2);
for (grouper = casegrouper_create_vars (input, cmd->by_var,
cmd->n_by_vars);
casegrouper_get_next_group (grouper, &group);
@@ -419,7 +429,9 @@ run_barchart (struct graph *cmd, struct casereader *input)
/* Deal with missing values in the categorical variables */
for (v = 0; v < cmd->n_by_vars; ++v)
{
- if (var_is_value_missing (cmd->by_var[v], case_data (c, cmd->by_var[v]), cmd->fctr_excl) )
+ if (var_is_value_missing (cmd->by_var[v],
+ case_data (c, cmd->by_var[v]),
+ cmd->fctr_excl))
break;
}
@@ -429,23 +441,46 @@ run_barchart (struct graph *cmd, struct casereader *input)
continue;
}
- freqs = xrealloc (freqs, sizeof (*freqs) * ++n_freqs);
- freqs[n_freqs - 1] = xzalloc (sizeof (**freqs) +
- sizeof (union value) * (cmd->n_by_vars - 1) );
+ cells = xrealloc (cells, sizeof (*cells) * ++n_cells);
+ cells[n_cells - 1] = xzalloc (sizeof (**cells)
+ + sizeof (union value)
+ * (cmd->n_by_vars - 1));
- if (ag_func[cmd->agr].cumulative && n_freqs >= 2)
- freqs[n_freqs - 1]->count = freqs[n_freqs - 2]->count;
+ if (ag_func[cmd->agr].cumulative && n_cells >= 2)
+ cells[n_cells - 1]->count = cells[n_cells - 2]->count;
else
- freqs[n_freqs - 1]->count = 0;
+ cells[n_cells - 1]->count = 0;
if (ag_func[cmd->agr].pre)
- freqs[n_freqs - 1]->count = ag_func[cmd->agr].pre();
+ cells[n_cells - 1]->count = ag_func[cmd->agr].pre();
+
+ if (cmd->n_by_vars > 1)
+ {
+ const union value *vv = case_data (c, cmd->by_var[1]);
+ const double weight = dict_get_case_weight (cmd->dict, c, NULL);
+ int v1_width = var_get_width (cmd->by_var[1]);
+ size_t hash = value_hash (vv, v1_width, 0);
+
+ struct freq *fcol = NULL;
+ HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns)
+ if (value_equal (vv, &fcol->values[0], v1_width))
+ break;
+ if (fcol)
+ fcol->count += weight;
+ else
+ {
+ fcol = xzalloc (sizeof *fcol);
+ fcol->count = weight;
+ value_clone (&fcol->values[0], vv, v1_width);
+ hmap_insert (&columns, &fcol->node, hash);
+ }
+ }
for (v = 0; v < cmd->n_by_vars; ++v)
{
- value_clone (&freqs[n_freqs - 1]->values[v], case_data (c, cmd->by_var[v]),
- var_get_width (cmd->by_var[v])
- );
+ value_clone (&cells[n_cells - 1]->values[v],
+ case_data (c, cmd->by_var[v]),
+ var_get_width (cmd->by_var[v]));
}
case_unref (c);
@@ -453,29 +488,60 @@ run_barchart (struct graph *cmd, struct casereader *input)
for (;(c = casereader_read (group)) != NULL; case_unref (c))
{
const double weight = dict_get_case_weight (cmd->dict,c,NULL);
- const double x = (cmd->n_dep_vars > 0) ? case_data (c, cmd->dep_vars[0])->f : SYSMIS;
+ const double x = (cmd->n_dep_vars > 0)
+ ? case_data (c, cmd->dep_vars[0])->f : SYSMIS;
cc += weight;
-
- freqs[n_freqs - 1]->count
- = ag_func[cmd->agr].calc (freqs[n_freqs - 1]->count, x, weight);
+
+ cells[n_cells - 1]->count
+ = ag_func[cmd->agr].calc (cells[n_cells - 1]->count, x, weight);
}
if (ag_func[cmd->agr].post)
- freqs[n_freqs - 1]->count
- = ag_func[cmd->agr].post (freqs[n_freqs - 1]->count, cc);
+ cells[n_cells - 1]->count
+ = ag_func[cmd->agr].post (cells[n_cells - 1]->count, cc);
ccc += cc;
}
casegrouper_destroy (grouper);
- for (int i = 0; i < n_freqs; ++i)
+ for (int i = 0; i < n_cells; ++i)
{
if (ag_func[cmd->agr].ppost)
- freqs[i]->count = ag_func[cmd->agr].ppost (freqs[i]->count, ccc);
+ {
+ struct freq *cell = cells[i];
+ if (cmd->n_by_vars > 1)
+ {
+ const union value *vv = &cell->values[1];
+
+ int v1_width = var_get_width (cmd->by_var[1]);
+ size_t hash = value_hash (vv, v1_width, 0);
+
+ struct freq *fcol = NULL;
+ HMAP_FOR_EACH_WITH_HASH (fcol, struct freq, node, hash, &columns)
+ if (value_equal (vv, &fcol->values[0], v1_width))
+ break;
+
+ cell->count = ag_func[cmd->agr].ppost (cell->count, fcol->count);
+ }
+ else
+ cell->count = ag_func[cmd->agr].ppost (cell->count, ccc);
+ }
}
+ if (cmd->n_by_vars > 1)
+ {
+ struct freq *col_cell;
+ struct freq *next;
+ HMAP_FOR_EACH_SAFE (col_cell, next, struct freq, node, &columns)
+ {
+
+ value_destroy (col_cell->values, var_get_width (cmd->by_var[1]));
+ free (col_cell);
+ }
+ }
+ hmap_destroy (&columns);
{
struct string label;
@@ -486,20 +552,20 @@ run_barchart (struct graph *cmd, struct casereader *input)
ag_func[cmd->agr].description,
var_get_name (cmd->dep_vars[0]));
else
- ds_put_cstr (&label,
+ ds_put_cstr (&label,
ag_func[cmd->agr].description);
-
- chart_item_submit (barchart_create (cmd->by_var, cmd->n_by_vars,
- ds_cstr (&label),
- freqs, n_freqs));
+
+ chart_submit (barchart_create (cmd->by_var, cmd->n_by_vars,
+ ds_cstr (&label), false,
+ cells, n_cells));
ds_destroy (&label);
}
- for (int i = 0; i < n_freqs; ++i)
- free (freqs[i]);
-
- free (freqs);
+ for (int i = 0; i < n_cells; ++i)
+ free (cells[i]);
+
+ free (cells);
}
@@ -510,7 +576,7 @@ run_graph (struct graph *cmd, struct casereader *input)
struct casereader *reader;
struct casewriter *writer;
- cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats));
+ cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars, sizeof *cmd->es);
for(int v=0;vn_dep_vars;v++)
{
cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
@@ -521,7 +587,7 @@ run_graph (struct graph *cmd, struct casereader *input)
/* Always remove cases listwise. This is correct for */
/* the histogram because there is only one variable */
/* and a simple bivariate scatterplot */
- /* if ( cmd->missing_pw == false) */
+ /* if (cmd->missing_pw == false) */
input = casereader_create_filter_missing (input,
cmd->dep_vars,
cmd->n_dep_vars,
@@ -605,14 +671,14 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
struct graph graph;
graph.missing_pw = false;
-
+
graph.pool = pool_create ();
graph.dep_excl = MV_ANY;
graph.fctr_excl = MV_ANY;
-
+
graph.dict = dataset_dict (ds);
-
+
graph.dep_vars = NULL;
graph.chart_type = CT_NONE;
graph.scatter_type = ST_BIVARIATE;
@@ -620,7 +686,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
graph.gr_proto = caseproto_create ();
subcase_init_empty (&graph.ordering);
-
+
while (lex_token (lexer) != T_ENDCMD)
{
lex_match (lexer, T_SLASH);
@@ -637,7 +703,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
{
if (!lex_force_match_id (lexer, "NORMAL"))
goto error;
-
+
if (!lex_force_match (lexer, T_RPAREN))
goto error;
@@ -665,27 +731,27 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
}
graph.chart_type = CT_BAR;
graph.bar_type = CBT_SIMPLE;
-
- if (lex_match (lexer, T_LPAREN))
+
+ if (lex_match (lexer, T_LPAREN))
{
if (lex_match_id (lexer, "SIMPLE"))
{
/* This is the default anyway */
}
- else if (lex_match_id (lexer, "GROUPED"))
+ else if (lex_match_id (lexer, "GROUPED"))
{
- graph.bar_type = CBT_GROUPED;
+ graph.bar_type = CBT_GROUPED;
goto error;
}
- else if (lex_match_id (lexer, "STACKED"))
+ else if (lex_match_id (lexer, "STACKED"))
{
- graph.bar_type = CBT_STACKED;
+ graph.bar_type = CBT_STACKED;
lex_error (lexer, _("%s is not yet implemented."), "STACKED");
goto error;
}
- else if (lex_match_id (lexer, "RANGE"))
+ else if (lex_match_id (lexer, "RANGE"))
{
- graph.bar_type = CBT_RANGE;
+ graph.bar_type = CBT_RANGE;
lex_error (lexer, _("%s is not yet implemented."), "RANGE");
goto error;
}
@@ -697,7 +763,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
if (!lex_force_match (lexer, T_RPAREN))
goto error;
}
-
+
if (!lex_force_match (lexer, T_EQUALS))
goto error;
@@ -712,30 +778,30 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
goto error;
}
graph.chart_type = CT_SCATTERPLOT;
- if (lex_match (lexer, T_LPAREN))
+ if (lex_match (lexer, T_LPAREN))
{
if (lex_match_id (lexer, "BIVARIATE"))
{
/* This is the default anyway */
}
- else if (lex_match_id (lexer, "OVERLAY"))
+ else if (lex_match_id (lexer, "OVERLAY"))
{
lex_error (lexer, _("%s is not yet implemented."),"OVERLAY");
goto error;
}
- else if (lex_match_id (lexer, "MATRIX"))
+ else if (lex_match_id (lexer, "MATRIX"))
{
lex_error (lexer, _("%s is not yet implemented."),"MATRIX");
goto error;
}
- else if (lex_match_id (lexer, "XYZ"))
+ else if (lex_match_id (lexer, "XYZ"))
{
lex_error(lexer, _("%s is not yet implemented."),"XYZ");
goto error;
}
else
{
- lex_error_expecting (lexer, "BIVARIATE", NULL);
+ lex_error_expecting (lexer, "BIVARIATE");
goto error;
}
if (!lex_force_match (lexer, T_RPAREN))
@@ -748,7 +814,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
&graph.dep_vars, &graph.n_dep_vars,
PV_NO_DUPLICATE | PV_NUMERIC))
goto error;
-
+
if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
{
lex_error(lexer, _("Only one variable is allowed."));
@@ -768,7 +834,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
lex_error (lexer, _("Only one variable is allowed."));
goto error;
}
-
+
if (lex_match (lexer, T_BY))
{
const struct variable *v = NULL;
@@ -778,6 +844,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
goto error;
}
graph.by_var[0] = v;
+ graph.n_by_vars = 1;
}
}
else if (lex_match_id (lexer, "LINE"))
@@ -865,20 +932,28 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
{
case CT_SCATTERPLOT:
/* See scatterplot.h for the setup of the case prototype */
- graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value - SP_IDX_X*/
- graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* y value - SP_IDX_Y*/
- /* The by_var contains the plot categories for the different xy plot colors */
+
+ /* x value - SP_IDX_X*/
+ graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
+
+ /* y value - SP_IDX_Y*/
+ graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
+ /* The by_var contains the plot categories for the different xy
+ plot colors */
if (graph.n_by_vars > 0) /* SP_IDX_BY */
- graph.gr_proto = caseproto_add_width (graph.gr_proto, var_get_width(graph.by_var[0]));
+ graph.gr_proto = caseproto_add_width (graph.gr_proto,
+ var_get_width(graph.by_var[0]));
break;
case CT_HISTOGRAM:
- graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value */
- graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* weight value */
+ /* x value */
+ graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
+ /* weight value */
+ graph.gr_proto = caseproto_add_width (graph.gr_proto, 0);
break;
case CT_BAR:
break;
case CT_NONE:
- lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR", NULL);
+ lex_error_expecting (lexer, "HISTOGRAM", "SCATTERPLOT", "BAR");
goto error;
default:
NOT_REACHED ();
@@ -889,7 +964,7 @@ cmd_graph (struct lexer *lexer, struct dataset *ds)
struct casegrouper *grouper;
struct casereader *group;
bool ok;
-
+
grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
while (casegrouper_get_next_group (grouper, &group))
{