2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * This module implements the graph command
26 #include <gsl/gsl_cdf.h>
28 #include "libpspp/assertion.h"
29 #include "libpspp/message.h"
30 #include "libpspp/pool.h"
33 #include "data/dataset.h"
34 #include "data/dictionary.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/casewriter.h"
38 #include "data/caseproto.h"
39 #include "data/subcase.h"
42 #include "data/format.h"
44 #include "math/chart-geometry.h"
45 #include "math/histogram.h"
46 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "output/charts/plot-hist.h"
50 #include "output/charts/scatterplot.h"
52 #include "language/command.h"
53 #include "language/lexer/lexer.h"
54 #include "language/lexer/value-parser.h"
55 #include "language/lexer/variable-parser.h"
57 #include "output/tab.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
84 struct exploratory_stats
97 /* The minimum weight */
107 const struct variable **dep_vars;
108 struct exploratory_stats *es;
110 enum mv_class dep_excl;
111 enum mv_class fctr_excl;
113 const struct dictionary *dict;
117 /* ------------ Graph ---------------- */
118 enum chart_type chart_type;
119 enum scatter_type scatter_type;
120 const struct variable *byvar;
125 show_scatterplot (const struct graph *cmd, const struct casereader *input)
128 struct scatterplot_chart *scatterplot;
129 bool byvar_overflow = false;
131 ds_init_cstr (&title, var_to_string (cmd->dep_vars[0]));
132 ds_put_cstr (&title, " vs ");
133 ds_put_cstr (&title, var_to_string (cmd->dep_vars[1]));
136 ds_put_cstr (&title, " by ");
137 ds_put_cstr (&title, var_to_string (cmd->byvar));
140 scatterplot = scatterplot_create(input,
146 cmd->es[0].minimum, cmd->es[0].maximum,
147 cmd->es[1].minimum, cmd->es[1].maximum);
148 scatterplot_chart_submit(scatterplot);
153 msg (MW, _("Maximum number of scatterplot categories reached."
154 "Your BY variable has too many distinct values."
155 "The colouring of the plot will not be correct"));
162 show_histogr (const struct graph *cmd, const struct casereader *input)
164 struct histogram *histogram;
166 struct casereader *reader;
170 double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
171 / (1 + log2 (cmd->es[0].cc))
175 histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum);
179 for (reader=casereader_clone(input);(c = casereader_read (reader)) != NULL; case_unref (c))
181 const struct variable *var = cmd->dep_vars[0];
182 const double x = case_data (c, var)->f;
183 const double weight = dict_get_case_weight(cmd->dict,c,NULL);
184 moments_pass_two (cmd->es[0].mom, x, weight);
185 histogram_add (histogram, x, weight);
187 casereader_destroy(reader);
195 ds_init_cstr (&label,
196 var_to_string (cmd->dep_vars[0]));
198 moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
201 ( histogram_chart_create (histogram->gsl_hist,
202 ds_cstr (&label), n, mean,
205 statistic_destroy(&histogram->parent);
211 cleanup_exploratory_stats (struct graph *cmd)
215 for (v = 0; v < cmd->n_dep_vars; ++v)
217 moments_destroy (cmd->es[v].mom);
223 run_graph (struct graph *cmd, struct casereader *input)
226 struct casereader *reader;
229 cmd->es = pool_calloc(cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats));
230 for(int v=0;v<cmd->n_dep_vars;v++)
232 cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
233 cmd->es[v].cmin = DBL_MAX;
234 cmd->es[v].maximum = -DBL_MAX;
235 cmd->es[v].minimum = DBL_MAX;
237 /* Always remove cases listwise. This is correct for */
238 /* the histogram because there is only one variable */
239 /* and a simple bivariate scatterplot */
240 /* if ( cmd->missing_pw == false) */
241 input = casereader_create_filter_missing (input,
248 for (reader = casereader_clone (input);
249 (c = casereader_read (reader)) != NULL; case_unref (c))
251 const double weight = dict_get_case_weight(cmd->dict,c,NULL);
252 for(int v=0;v<cmd->n_dep_vars;v++)
254 const struct variable *var = cmd->dep_vars[v];
255 const double x = case_data (c, var)->f;
257 if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl))
259 cmd->es[v].missing += weight;
263 if (x > cmd->es[v].maximum)
264 cmd->es[v].maximum = x;
266 if (x < cmd->es[v].minimum)
267 cmd->es[v].minimum = x;
269 cmd->es[v].non_missing += weight;
271 moments_pass_one (cmd->es[v].mom, x, weight);
273 cmd->es[v].cc += weight;
275 if (cmd->es[v].cmin > weight)
276 cmd->es[v].cmin = weight;
279 casereader_destroy (reader);
281 switch (cmd->chart_type)
284 reader = casereader_clone(input);
285 show_histogr(cmd,reader);
286 casereader_destroy(reader);
289 reader = casereader_clone(input);
290 show_scatterplot(cmd,reader);
291 casereader_destroy(reader);
298 casereader_destroy(input);
300 cleanup_exploratory_stats (cmd);
305 cmd_graph (struct lexer *lexer, struct dataset *ds)
309 graph.missing_pw = false;
311 graph.pool = pool_create ();
313 graph.dep_excl = MV_ANY;
314 graph.fctr_excl = MV_ANY;
316 graph.dict = dataset_dict (ds);
319 /* ---------------- graph ------------------ */
320 graph.dep_vars = NULL;
321 graph.chart_type = CT_NONE;
322 graph.scatter_type = ST_BIVARIATE;
325 while (lex_token (lexer) != T_ENDCMD)
327 lex_match (lexer, T_SLASH);
329 if (lex_match_id(lexer, "HISTOGRAM"))
331 if (graph.chart_type != CT_NONE)
333 lex_error(lexer, _("Only one chart type is allowed."));
336 if (!lex_force_match (lexer, T_EQUALS))
338 graph.chart_type = CT_HISTOGRAM;
339 if (!parse_variables_const (lexer, graph.dict,
340 &graph.dep_vars, &graph.n_dep_vars,
341 PV_NO_DUPLICATE | PV_NUMERIC))
343 if (graph.n_dep_vars > 1)
345 lex_error(lexer, _("Only one variable allowed"));
349 else if (lex_match_id (lexer, "SCATTERPLOT"))
351 if (graph.chart_type != CT_NONE)
353 lex_error(lexer, _("Only one chart type is allowed."));
356 graph.chart_type = CT_SCATTERPLOT;
357 if (lex_match (lexer, T_LPAREN))
359 if (lex_match_id (lexer, "BIVARIATE"))
361 /* This is the default anyway */
363 else if (lex_match_id (lexer, "OVERLAY"))
365 lex_error(lexer, _("%s is not yet implemented."),"OVERLAY");
368 else if (lex_match_id (lexer, "MATRIX"))
370 lex_error(lexer, _("%s is not yet implemented."),"MATRIX");
373 else if (lex_match_id (lexer, "XYZ"))
375 lex_error(lexer, _("%s is not yet implemented."),"XYZ");
380 lex_error_expecting(lexer, "BIVARIATE", NULL);
383 if (!lex_force_match (lexer, T_RPAREN))
386 if (!lex_force_match (lexer, T_EQUALS))
389 if (!parse_variables_const (lexer, graph.dict,
390 &graph.dep_vars, &graph.n_dep_vars,
391 PV_NO_DUPLICATE | PV_NUMERIC))
394 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
396 lex_error(lexer, _("Only one variable allowed"));
400 if (!lex_force_match (lexer, T_WITH))
403 if (!parse_variables_const (lexer, graph.dict,
404 &graph.dep_vars, &graph.n_dep_vars,
405 PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND))
408 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2)
410 lex_error(lexer, _("Only one variable allowed"));
414 if (lex_match(lexer, T_BY))
416 const struct variable *v = NULL;
417 if (!lex_match_variable (lexer,graph.dict,&v))
419 lex_error(lexer, _("Variable expected"));
425 else if (lex_match_id (lexer, "BAR"))
427 lex_error (lexer, _("%s is not yet implemented."),"BAR");
430 else if (lex_match_id (lexer, "LINE"))
432 lex_error (lexer, _("%s is not yet implemented."),"LINE");
435 else if (lex_match_id (lexer, "PIE"))
437 lex_error (lexer, _("%s is not yet implemented."),"PIE");
440 else if (lex_match_id (lexer, "ERRORBAR"))
442 lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR");
445 else if (lex_match_id (lexer, "PARETO"))
447 lex_error (lexer, _("%s is not yet implemented."),"PARETO");
450 else if (lex_match_id (lexer, "TITLE"))
452 lex_error (lexer, _("%s is not yet implemented."),"TITLE");
455 else if (lex_match_id (lexer, "SUBTITLE"))
457 lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE");
460 else if (lex_match_id (lexer, "FOOTNOTE"))
462 lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE");
463 lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH"));
466 else if (lex_match_id (lexer, "MISSING"))
468 lex_match (lexer, T_EQUALS);
470 while (lex_token (lexer) != T_ENDCMD
471 && lex_token (lexer) != T_SLASH)
473 if (lex_match_id (lexer, "LISTWISE"))
475 graph.missing_pw = false;
477 else if (lex_match_id (lexer, "VARIABLE"))
479 graph.missing_pw = true;
481 else if (lex_match_id (lexer, "EXCLUDE"))
483 graph.dep_excl = MV_ANY;
485 else if (lex_match_id (lexer, "INCLUDE"))
487 graph.dep_excl = MV_SYSTEM;
489 else if (lex_match_id (lexer, "REPORT"))
491 graph.fctr_excl = MV_NEVER;
493 else if (lex_match_id (lexer, "NOREPORT"))
495 graph.fctr_excl = MV_ANY;
499 lex_error (lexer, NULL);
506 lex_error (lexer, NULL);
511 if (graph.chart_type == CT_NONE)
513 lex_error_expecting(lexer,"HISTOGRAM","SCATTERPLOT",NULL);
519 struct casegrouper *grouper;
520 struct casereader *group;
523 grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
524 while (casegrouper_get_next_group (grouper, &group))
525 run_graph (&graph, group);
526 ok = casegrouper_destroy (grouper);
527 ok = proc_commit (ds) && ok;
530 free (graph.dep_vars);
531 pool_destroy (graph.pool);
536 free (graph.dep_vars);
537 pool_destroy (graph.pool);