2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * This module implements the graph command
26 #include <gsl/gsl_cdf.h>
28 #include "libpspp/assertion.h"
29 #include "libpspp/message.h"
30 #include "libpspp/pool.h"
33 #include "data/dataset.h"
34 #include "data/dictionary.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/casewriter.h"
38 #include "data/caseproto.h"
39 #include "data/subcase.h"
42 #include "data/format.h"
44 #include "math/chart-geometry.h"
45 #include "math/histogram.h"
46 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "output/charts/plot-hist.h"
50 #include "output/charts/scatterplot.h"
52 #include "language/command.h"
53 #include "language/lexer/lexer.h"
54 #include "language/lexer/value-parser.h"
55 #include "language/lexer/variable-parser.h"
57 #include "output/tab.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
84 struct exploratory_stats
97 /* The minimum weight */
107 const struct variable **dep_vars;
108 struct exploratory_stats *es;
110 enum mv_class dep_excl;
111 enum mv_class fctr_excl;
113 const struct dictionary *dict;
117 /* ------------ Graph ---------------- */
118 enum chart_type chart_type;
119 enum scatter_type scatter_type;
120 const struct variable *byvar;
125 show_scatterplot (const struct graph *cmd, const struct casereader *input)
128 struct scatterplot_chart *scatterplot;
129 bool byvar_overflow = false;
131 ds_init_empty (&title);
135 ds_put_format (&title, _("%s vs. %s by %s"),
136 var_to_string (cmd->dep_vars[0]),
137 var_to_string (cmd->dep_vars[1]),
138 var_to_string (cmd->byvar));
142 ds_put_format (&title, _("%s vs. %s"),
143 var_to_string (cmd->dep_vars[0]),
144 var_to_string (cmd->dep_vars[1]));
148 scatterplot = scatterplot_create(input,
154 cmd->es[0].minimum, cmd->es[0].maximum,
155 cmd->es[1].minimum, cmd->es[1].maximum);
156 scatterplot_chart_submit(scatterplot);
161 msg (MW, _("Maximum number of scatterplot categories reached."
162 "Your BY variable has too many distinct values."
163 "The colouring of the plot will not be correct"));
170 show_histogr (const struct graph *cmd, const struct casereader *input)
172 struct histogram *histogram;
174 struct casereader *reader;
178 double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
179 / (1 + log2 (cmd->es[0].cc))
183 histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum);
187 for (reader=casereader_clone(input);(c = casereader_read (reader)) != NULL; case_unref (c))
189 const struct variable *var = cmd->dep_vars[0];
190 const double x = case_data (c, var)->f;
191 const double weight = dict_get_case_weight(cmd->dict,c,NULL);
192 moments_pass_two (cmd->es[0].mom, x, weight);
193 histogram_add (histogram, x, weight);
195 casereader_destroy(reader);
203 ds_init_cstr (&label,
204 var_to_string (cmd->dep_vars[0]));
206 moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
209 ( histogram_chart_create (histogram->gsl_hist,
210 ds_cstr (&label), n, mean,
213 statistic_destroy(&histogram->parent);
219 cleanup_exploratory_stats (struct graph *cmd)
223 for (v = 0; v < cmd->n_dep_vars; ++v)
225 moments_destroy (cmd->es[v].mom);
231 run_graph (struct graph *cmd, struct casereader *input)
234 struct casereader *reader;
237 cmd->es = pool_calloc(cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats));
238 for(int v=0;v<cmd->n_dep_vars;v++)
240 cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
241 cmd->es[v].cmin = DBL_MAX;
242 cmd->es[v].maximum = -DBL_MAX;
243 cmd->es[v].minimum = DBL_MAX;
245 /* Always remove cases listwise. This is correct for */
246 /* the histogram because there is only one variable */
247 /* and a simple bivariate scatterplot */
248 /* if ( cmd->missing_pw == false) */
249 input = casereader_create_filter_missing (input,
256 for (reader = casereader_clone (input);
257 (c = casereader_read (reader)) != NULL; case_unref (c))
259 const double weight = dict_get_case_weight(cmd->dict,c,NULL);
260 for(int v=0;v<cmd->n_dep_vars;v++)
262 const struct variable *var = cmd->dep_vars[v];
263 const double x = case_data (c, var)->f;
265 if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl))
267 cmd->es[v].missing += weight;
271 if (x > cmd->es[v].maximum)
272 cmd->es[v].maximum = x;
274 if (x < cmd->es[v].minimum)
275 cmd->es[v].minimum = x;
277 cmd->es[v].non_missing += weight;
279 moments_pass_one (cmd->es[v].mom, x, weight);
281 cmd->es[v].cc += weight;
283 if (cmd->es[v].cmin > weight)
284 cmd->es[v].cmin = weight;
287 casereader_destroy (reader);
289 switch (cmd->chart_type)
292 reader = casereader_clone(input);
293 show_histogr(cmd,reader);
294 casereader_destroy(reader);
297 reader = casereader_clone(input);
298 show_scatterplot(cmd,reader);
299 casereader_destroy(reader);
306 casereader_destroy(input);
308 cleanup_exploratory_stats (cmd);
313 cmd_graph (struct lexer *lexer, struct dataset *ds)
317 graph.missing_pw = false;
319 graph.pool = pool_create ();
321 graph.dep_excl = MV_ANY;
322 graph.fctr_excl = MV_ANY;
324 graph.dict = dataset_dict (ds);
327 /* ---------------- graph ------------------ */
328 graph.dep_vars = NULL;
329 graph.chart_type = CT_NONE;
330 graph.scatter_type = ST_BIVARIATE;
333 while (lex_token (lexer) != T_ENDCMD)
335 lex_match (lexer, T_SLASH);
337 if (lex_match_id(lexer, "HISTOGRAM"))
339 if (graph.chart_type != CT_NONE)
341 lex_error(lexer, _("Only one chart type is allowed."));
344 if (!lex_force_match (lexer, T_EQUALS))
346 graph.chart_type = CT_HISTOGRAM;
347 if (!parse_variables_const (lexer, graph.dict,
348 &graph.dep_vars, &graph.n_dep_vars,
349 PV_NO_DUPLICATE | PV_NUMERIC))
351 if (graph.n_dep_vars > 1)
353 lex_error(lexer, _("Only one variable is allowed"));
357 else if (lex_match_id (lexer, "SCATTERPLOT"))
359 if (graph.chart_type != CT_NONE)
361 lex_error(lexer, _("Only one chart type is allowed."));
364 graph.chart_type = CT_SCATTERPLOT;
365 if (lex_match (lexer, T_LPAREN))
367 if (lex_match_id (lexer, "BIVARIATE"))
369 /* This is the default anyway */
371 else if (lex_match_id (lexer, "OVERLAY"))
373 lex_error(lexer, _("%s is not yet implemented."),"OVERLAY");
376 else if (lex_match_id (lexer, "MATRIX"))
378 lex_error(lexer, _("%s is not yet implemented."),"MATRIX");
381 else if (lex_match_id (lexer, "XYZ"))
383 lex_error(lexer, _("%s is not yet implemented."),"XYZ");
388 lex_error_expecting(lexer, "BIVARIATE", NULL);
391 if (!lex_force_match (lexer, T_RPAREN))
394 if (!lex_force_match (lexer, T_EQUALS))
397 if (!parse_variables_const (lexer, graph.dict,
398 &graph.dep_vars, &graph.n_dep_vars,
399 PV_NO_DUPLICATE | PV_NUMERIC))
402 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
404 lex_error(lexer, _("Only one variable allowed"));
408 if (!lex_force_match (lexer, T_WITH))
411 if (!parse_variables_const (lexer, graph.dict,
412 &graph.dep_vars, &graph.n_dep_vars,
413 PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND))
416 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2)
418 lex_error(lexer, _("Only one variable allowed"));
422 if (lex_match(lexer, T_BY))
424 const struct variable *v = NULL;
425 if (!lex_match_variable (lexer,graph.dict,&v))
427 lex_error(lexer, _("Variable expected"));
433 else if (lex_match_id (lexer, "BAR"))
435 lex_error (lexer, _("%s is not yet implemented."),"BAR");
438 else if (lex_match_id (lexer, "LINE"))
440 lex_error (lexer, _("%s is not yet implemented."),"LINE");
443 else if (lex_match_id (lexer, "PIE"))
445 lex_error (lexer, _("%s is not yet implemented."),"PIE");
448 else if (lex_match_id (lexer, "ERRORBAR"))
450 lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR");
453 else if (lex_match_id (lexer, "PARETO"))
455 lex_error (lexer, _("%s is not yet implemented."),"PARETO");
458 else if (lex_match_id (lexer, "TITLE"))
460 lex_error (lexer, _("%s is not yet implemented."),"TITLE");
463 else if (lex_match_id (lexer, "SUBTITLE"))
465 lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE");
468 else if (lex_match_id (lexer, "FOOTNOTE"))
470 lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE");
471 lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH"));
474 else if (lex_match_id (lexer, "MISSING"))
476 lex_match (lexer, T_EQUALS);
478 while (lex_token (lexer) != T_ENDCMD
479 && lex_token (lexer) != T_SLASH)
481 if (lex_match_id (lexer, "LISTWISE"))
483 graph.missing_pw = false;
485 else if (lex_match_id (lexer, "VARIABLE"))
487 graph.missing_pw = true;
489 else if (lex_match_id (lexer, "EXCLUDE"))
491 graph.dep_excl = MV_ANY;
493 else if (lex_match_id (lexer, "INCLUDE"))
495 graph.dep_excl = MV_SYSTEM;
497 else if (lex_match_id (lexer, "REPORT"))
499 graph.fctr_excl = MV_NEVER;
501 else if (lex_match_id (lexer, "NOREPORT"))
503 graph.fctr_excl = MV_ANY;
507 lex_error (lexer, NULL);
514 lex_error (lexer, NULL);
519 if (graph.chart_type == CT_NONE)
521 lex_error_expecting(lexer,"HISTOGRAM","SCATTERPLOT",NULL);
527 struct casegrouper *grouper;
528 struct casereader *group;
531 grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
532 while (casegrouper_get_next_group (grouper, &group))
533 run_graph (&graph, group);
534 ok = casegrouper_destroy (grouper);
535 ok = proc_commit (ds) && ok;
538 free (graph.dep_vars);
539 pool_destroy (graph.pool);
544 free (graph.dep_vars);
545 pool_destroy (graph.pool);