2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * This module implements the graph command
26 #include <gsl/gsl_cdf.h>
28 #include "libpspp/assertion.h"
29 #include "libpspp/message.h"
30 #include "libpspp/pool.h"
33 #include "data/dataset.h"
34 #include "data/dictionary.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/casewriter.h"
38 #include "data/caseproto.h"
39 #include "data/subcase.h"
42 #include "data/format.h"
44 #include "math/chart-geometry.h"
45 #include "math/histogram.h"
46 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "output/charts/plot-hist.h"
50 #include "output/charts/scatterplot.h"
52 #include "language/command.h"
53 #include "language/lexer/lexer.h"
54 #include "language/lexer/value-parser.h"
55 #include "language/lexer/variable-parser.h"
57 #include "output/tab.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
84 /* Variable index for histogram case */
91 struct exploratory_stats
104 /* The minimum weight */
114 const struct variable **dep_vars;
115 struct exploratory_stats *es;
117 enum mv_class dep_excl;
118 enum mv_class fctr_excl;
120 const struct dictionary *dict;
124 /* ------------ Graph ---------------- */
125 enum chart_type chart_type;
126 enum scatter_type scatter_type;
127 const struct variable *byvar;
128 /* A caseproto that contains the plot data */
129 struct caseproto *gr_proto;
134 show_scatterplot (const struct graph *cmd, const struct casereader *input)
137 struct scatterplot_chart *scatterplot;
138 bool byvar_overflow = false;
140 ds_init_empty (&title);
144 ds_put_format (&title, _("%s vs. %s by %s"),
145 var_to_string (cmd->dep_vars[0]),
146 var_to_string (cmd->dep_vars[1]),
147 var_to_string (cmd->byvar));
151 ds_put_format (&title, _("%s vs. %s"),
152 var_to_string (cmd->dep_vars[0]),
153 var_to_string (cmd->dep_vars[1]));
157 scatterplot = scatterplot_create (input,
158 var_to_string(cmd->dep_vars[0]),
159 var_to_string(cmd->dep_vars[1]),
163 cmd->es[0].minimum, cmd->es[0].maximum,
164 cmd->es[1].minimum, cmd->es[1].maximum);
165 scatterplot_chart_submit (scatterplot);
170 msg (MW, _("Maximum number of scatterplot categories reached."
171 "Your BY variable has too many distinct values."
172 "The coloring of the plot will not be correct"));
177 show_histogr (const struct graph *cmd, const struct casereader *input)
179 struct histogram *histogram;
181 struct casereader *reader;
185 double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
186 / (1 + log2 (cmd->es[0].cc))
190 histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum);
194 for (;(c = casereader_read (input)) != NULL; case_unref (c))
196 const double x = case_data_idx (c, HG_IDX_X)->f;
197 const double weight = case_data_idx (c, HG_IDX_WT)->f;
198 moments_pass_two (cmd->es[0].mom, x, weight);
199 histogram_add (histogram, x, weight);
201 casereader_destroy (input);
209 ds_init_cstr (&label,
210 var_to_string (cmd->dep_vars[0]));
212 moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
215 ( histogram_chart_create (histogram->gsl_hist,
216 ds_cstr (&label), n, mean,
219 statistic_destroy (&histogram->parent);
225 cleanup_exploratory_stats (struct graph *cmd)
229 for (v = 0; v < cmd->n_dep_vars; ++v)
231 moments_destroy (cmd->es[v].mom);
237 run_graph (struct graph *cmd, struct casereader *input)
240 struct casereader *reader, *writer;
242 cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats));
243 for(int v=0;v<cmd->n_dep_vars;v++)
245 cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
246 cmd->es[v].cmin = DBL_MAX;
247 cmd->es[v].maximum = -DBL_MAX;
248 cmd->es[v].minimum = DBL_MAX;
250 /* Always remove cases listwise. This is correct for */
251 /* the histogram because there is only one variable */
252 /* and a simple bivariate scatterplot */
253 /* if ( cmd->missing_pw == false) */
254 input = casereader_create_filter_missing (input,
261 writer = autopaging_writer_create (cmd->gr_proto);
263 /* The case data is copied to a new writer */
264 /* The setup of the case depends on the Charttype */
265 /* For Scatterplot x is assumed in dep_vars[0] */
266 /* y is assumed in dep_vars[1] */
267 /* For Histogram x is assumed in dep_vars[0] */
268 assert(SP_IDX_X == 0 && SP_IDX_Y == 1 && HG_IDX_X == 0);
270 for (;(c = casereader_read (input)) != NULL; case_unref (c))
272 struct ccase *outcase = case_create (cmd->gr_proto);
273 const double weight = dict_get_case_weight (cmd->dict,c,NULL);
274 if (cmd->chart_type == CT_HISTOGRAM)
275 case_data_rw_idx (outcase, HG_IDX_WT)->f = weight;
276 if (cmd->chart_type == CT_SCATTERPLOT && cmd->byvar)
277 value_copy (case_data_rw_idx (outcase, SP_IDX_BY),
278 case_data (c, cmd->byvar),
279 var_get_width (cmd->byvar));
280 for(int v=0;v<cmd->n_dep_vars;v++)
282 const struct variable *var = cmd->dep_vars[v];
283 const double x = case_data (c, var)->f;
285 if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl))
287 cmd->es[v].missing += weight;
290 /* Magically v value fits to SP_IDX_X, SP_IDX_Y, HG_IDX_X */
291 case_data_rw_idx (outcase, v)->f = x;
293 if (x > cmd->es[v].maximum)
294 cmd->es[v].maximum = x;
296 if (x < cmd->es[v].minimum)
297 cmd->es[v].minimum = x;
299 cmd->es[v].non_missing += weight;
301 moments_pass_one (cmd->es[v].mom, x, weight);
303 cmd->es[v].cc += weight;
305 if (cmd->es[v].cmin > weight)
306 cmd->es[v].cmin = weight;
308 casewriter_write (writer,outcase);
311 reader = casewriter_make_reader (writer);
313 switch (cmd->chart_type)
316 show_histogr (cmd,reader);
319 show_scatterplot (cmd,reader);
326 casereader_destroy (input);
327 cleanup_exploratory_stats (cmd);
332 cmd_graph (struct lexer *lexer, struct dataset *ds)
336 graph.missing_pw = false;
338 graph.pool = pool_create ();
340 graph.dep_excl = MV_ANY;
341 graph.fctr_excl = MV_ANY;
343 graph.dict = dataset_dict (ds);
346 /* ---------------- graph ------------------ */
347 graph.dep_vars = NULL;
348 graph.chart_type = CT_NONE;
349 graph.scatter_type = ST_BIVARIATE;
351 graph.gr_proto = caseproto_create ();
353 while (lex_token (lexer) != T_ENDCMD)
355 lex_match (lexer, T_SLASH);
357 if (lex_match_id (lexer, "HISTOGRAM"))
359 if (graph.chart_type != CT_NONE)
361 lex_error (lexer, _("Only one chart type is allowed."));
364 if (!lex_force_match (lexer, T_EQUALS))
366 graph.chart_type = CT_HISTOGRAM;
367 if (!parse_variables_const (lexer, graph.dict,
368 &graph.dep_vars, &graph.n_dep_vars,
369 PV_NO_DUPLICATE | PV_NUMERIC))
371 if (graph.n_dep_vars > 1)
373 lex_error (lexer, _("Only one variable is allowed."));
377 else if (lex_match_id (lexer, "SCATTERPLOT"))
379 if (graph.chart_type != CT_NONE)
381 lex_error (lexer, _("Only one chart type is allowed."));
384 graph.chart_type = CT_SCATTERPLOT;
385 if (lex_match (lexer, T_LPAREN))
387 if (lex_match_id (lexer, "BIVARIATE"))
389 /* This is the default anyway */
391 else if (lex_match_id (lexer, "OVERLAY"))
393 lex_error (lexer, _("%s is not yet implemented."),"OVERLAY");
396 else if (lex_match_id (lexer, "MATRIX"))
398 lex_error (lexer, _("%s is not yet implemented."),"MATRIX");
401 else if (lex_match_id (lexer, "XYZ"))
403 lex_error(lexer, _("%s is not yet implemented."),"XYZ");
408 lex_error_expecting (lexer, "BIVARIATE", NULL);
411 if (!lex_force_match (lexer, T_RPAREN))
414 if (!lex_force_match (lexer, T_EQUALS))
417 if (!parse_variables_const (lexer, graph.dict,
418 &graph.dep_vars, &graph.n_dep_vars,
419 PV_NO_DUPLICATE | PV_NUMERIC))
422 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
424 lex_error(lexer, _("Only one variable is allowed."));
428 if (!lex_force_match (lexer, T_WITH))
431 if (!parse_variables_const (lexer, graph.dict,
432 &graph.dep_vars, &graph.n_dep_vars,
433 PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND))
436 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2)
438 lex_error (lexer, _("Only one variable is allowed."));
442 if (lex_match (lexer, T_BY))
444 const struct variable *v = NULL;
445 if (!lex_match_variable (lexer,graph.dict,&v))
447 lex_error (lexer, _("Variable expected"));
453 else if (lex_match_id (lexer, "BAR"))
455 lex_error (lexer, _("%s is not yet implemented."),"BAR");
458 else if (lex_match_id (lexer, "LINE"))
460 lex_error (lexer, _("%s is not yet implemented."),"LINE");
463 else if (lex_match_id (lexer, "PIE"))
465 lex_error (lexer, _("%s is not yet implemented."),"PIE");
468 else if (lex_match_id (lexer, "ERRORBAR"))
470 lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR");
473 else if (lex_match_id (lexer, "PARETO"))
475 lex_error (lexer, _("%s is not yet implemented."),"PARETO");
478 else if (lex_match_id (lexer, "TITLE"))
480 lex_error (lexer, _("%s is not yet implemented."),"TITLE");
483 else if (lex_match_id (lexer, "SUBTITLE"))
485 lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE");
488 else if (lex_match_id (lexer, "FOOTNOTE"))
490 lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE");
491 lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH"));
494 else if (lex_match_id (lexer, "MISSING"))
496 lex_match (lexer, T_EQUALS);
498 while (lex_token (lexer) != T_ENDCMD
499 && lex_token (lexer) != T_SLASH)
501 if (lex_match_id (lexer, "LISTWISE"))
503 graph.missing_pw = false;
505 else if (lex_match_id (lexer, "VARIABLE"))
507 graph.missing_pw = true;
509 else if (lex_match_id (lexer, "EXCLUDE"))
511 graph.dep_excl = MV_ANY;
513 else if (lex_match_id (lexer, "INCLUDE"))
515 graph.dep_excl = MV_SYSTEM;
517 else if (lex_match_id (lexer, "REPORT"))
519 graph.fctr_excl = MV_NEVER;
521 else if (lex_match_id (lexer, "NOREPORT"))
523 graph.fctr_excl = MV_ANY;
527 lex_error (lexer, NULL);
534 lex_error (lexer, NULL);
539 switch (graph.chart_type)
542 /* See scatterplot.h for the setup of the case prototype */
543 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value - SP_IDX_X*/
544 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* y value - SP_IDX_Y*/
545 /* The byvar contains the plot categories for the different xy plot colors */
546 if (graph.byvar) /* SP_IDX_BY */
547 graph.gr_proto = caseproto_add_width (graph.gr_proto, var_get_width(graph.byvar));
550 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value */
551 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* weight value */
554 lex_error_expecting (lexer,"HISTOGRAM","SCATTERPLOT",NULL);
562 struct casegrouper *grouper;
563 struct casereader *group;
566 grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
567 while (casegrouper_get_next_group (grouper, &group))
568 run_graph (&graph, group);
569 ok = casegrouper_destroy (grouper);
570 ok = proc_commit (ds) && ok;
573 free (graph.dep_vars);
574 pool_destroy (graph.pool);
575 caseproto_unref (graph.gr_proto);
580 caseproto_unref (graph.gr_proto);
581 free (graph.dep_vars);
582 pool_destroy (graph.pool);