2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013, 2015 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 * This module implements the graph command
26 #include <gsl/gsl_cdf.h>
28 #include "libpspp/assertion.h"
29 #include "libpspp/message.h"
30 #include "libpspp/pool.h"
33 #include "data/dataset.h"
34 #include "data/dictionary.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/casewriter.h"
38 #include "data/caseproto.h"
39 #include "data/subcase.h"
42 #include "data/format.h"
44 #include "math/chart-geometry.h"
45 #include "math/histogram.h"
46 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "output/charts/plot-hist.h"
50 #include "output/charts/scatterplot.h"
52 #include "language/command.h"
53 #include "language/lexer/lexer.h"
54 #include "language/lexer/value-parser.h"
55 #include "language/lexer/variable-parser.h"
57 #include "output/tab.h"
60 #define _(msgid) gettext (msgid)
61 #define N_(msgid) msgid
84 /* Variable index for histogram case */
91 struct exploratory_stats
104 /* The minimum weight */
114 const struct variable **dep_vars;
115 struct exploratory_stats *es;
117 enum mv_class dep_excl;
118 enum mv_class fctr_excl;
120 const struct dictionary *dict;
124 /* ------------ Graph ---------------- */
125 enum chart_type chart_type;
126 enum scatter_type scatter_type;
127 const struct variable *byvar;
128 /* A caseproto that contains the plot data */
129 struct caseproto *gr_proto;
134 show_scatterplot (const struct graph *cmd, struct casereader *input)
137 struct scatterplot_chart *scatterplot;
138 bool byvar_overflow = false;
140 ds_init_empty (&title);
144 ds_put_format (&title, _("%s vs. %s by %s"),
145 var_to_string (cmd->dep_vars[1]),
146 var_to_string (cmd->dep_vars[0]),
147 var_to_string (cmd->byvar));
151 ds_put_format (&title, _("%s vs. %s"),
152 var_to_string (cmd->dep_vars[1]),
153 var_to_string (cmd->dep_vars[0]));
157 scatterplot = scatterplot_create (input,
158 var_to_string(cmd->dep_vars[0]),
159 var_to_string(cmd->dep_vars[1]),
163 cmd->es[0].minimum, cmd->es[0].maximum,
164 cmd->es[1].minimum, cmd->es[1].maximum);
165 scatterplot_chart_submit (scatterplot);
170 msg (MW, _("Maximum number of scatterplot categories reached. "
171 "Your BY variable has too many distinct values. "
172 "The coloring of the plot will not be correct."));
177 show_histogr (const struct graph *cmd, struct casereader *input)
179 struct histogram *histogram;
184 double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum)
185 / (1 + log2 (cmd->es[0].cc))
189 histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum);
192 if (NULL == histogram)
194 casereader_destroy (input);
198 for (;(c = casereader_read (input)) != NULL; case_unref (c))
200 const double x = case_data_idx (c, HG_IDX_X)->f;
201 const double weight = case_data_idx (c, HG_IDX_WT)->f;
202 moments_pass_two (cmd->es[0].mom, x, weight);
203 histogram_add (histogram, x, weight);
205 casereader_destroy (input);
213 ds_init_cstr (&label,
214 var_to_string (cmd->dep_vars[0]));
216 moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL);
219 ( histogram_chart_create (histogram->gsl_hist,
220 ds_cstr (&label), n, mean,
223 statistic_destroy (&histogram->parent);
229 cleanup_exploratory_stats (struct graph *cmd)
233 for (v = 0; v < cmd->n_dep_vars; ++v)
235 moments_destroy (cmd->es[v].mom);
241 run_graph (struct graph *cmd, struct casereader *input)
244 struct casereader *reader;
245 struct casewriter *writer;
247 cmd->es = pool_calloc (cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats));
248 for(int v=0;v<cmd->n_dep_vars;v++)
250 cmd->es[v].mom = moments_create (MOMENT_KURTOSIS);
251 cmd->es[v].cmin = DBL_MAX;
252 cmd->es[v].maximum = -DBL_MAX;
253 cmd->es[v].minimum = DBL_MAX;
255 /* Always remove cases listwise. This is correct for */
256 /* the histogram because there is only one variable */
257 /* and a simple bivariate scatterplot */
258 /* if ( cmd->missing_pw == false) */
259 input = casereader_create_filter_missing (input,
266 writer = autopaging_writer_create (cmd->gr_proto);
268 /* The case data is copied to a new writer */
269 /* The setup of the case depends on the Charttype */
270 /* For Scatterplot x is assumed in dep_vars[0] */
271 /* y is assumed in dep_vars[1] */
272 /* For Histogram x is assumed in dep_vars[0] */
273 assert(SP_IDX_X == 0 && SP_IDX_Y == 1 && HG_IDX_X == 0);
275 for (;(c = casereader_read (input)) != NULL; case_unref (c))
277 struct ccase *outcase = case_create (cmd->gr_proto);
278 const double weight = dict_get_case_weight (cmd->dict,c,NULL);
279 if (cmd->chart_type == CT_HISTOGRAM)
280 case_data_rw_idx (outcase, HG_IDX_WT)->f = weight;
281 if (cmd->chart_type == CT_SCATTERPLOT && cmd->byvar)
282 value_copy (case_data_rw_idx (outcase, SP_IDX_BY),
283 case_data (c, cmd->byvar),
284 var_get_width (cmd->byvar));
285 for(int v=0;v<cmd->n_dep_vars;v++)
287 const struct variable *var = cmd->dep_vars[v];
288 const double x = case_data (c, var)->f;
290 if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl))
292 cmd->es[v].missing += weight;
295 /* Magically v value fits to SP_IDX_X, SP_IDX_Y, HG_IDX_X */
296 case_data_rw_idx (outcase, v)->f = x;
298 if (x > cmd->es[v].maximum)
299 cmd->es[v].maximum = x;
301 if (x < cmd->es[v].minimum)
302 cmd->es[v].minimum = x;
304 cmd->es[v].non_missing += weight;
306 moments_pass_one (cmd->es[v].mom, x, weight);
308 cmd->es[v].cc += weight;
310 if (cmd->es[v].cmin > weight)
311 cmd->es[v].cmin = weight;
313 casewriter_write (writer,outcase);
316 reader = casewriter_make_reader (writer);
318 switch (cmd->chart_type)
321 show_histogr (cmd,reader);
324 show_scatterplot (cmd,reader);
331 casereader_destroy (input);
332 cleanup_exploratory_stats (cmd);
337 cmd_graph (struct lexer *lexer, struct dataset *ds)
341 graph.missing_pw = false;
343 graph.pool = pool_create ();
345 graph.dep_excl = MV_ANY;
346 graph.fctr_excl = MV_ANY;
348 graph.dict = dataset_dict (ds);
351 /* ---------------- graph ------------------ */
352 graph.dep_vars = NULL;
353 graph.chart_type = CT_NONE;
354 graph.scatter_type = ST_BIVARIATE;
356 graph.gr_proto = caseproto_create ();
358 while (lex_token (lexer) != T_ENDCMD)
360 lex_match (lexer, T_SLASH);
362 if (lex_match_id (lexer, "HISTOGRAM"))
364 if (graph.chart_type != CT_NONE)
366 lex_error (lexer, _("Only one chart type is allowed."));
369 if (!lex_force_match (lexer, T_EQUALS))
371 graph.chart_type = CT_HISTOGRAM;
372 if (!parse_variables_const (lexer, graph.dict,
373 &graph.dep_vars, &graph.n_dep_vars,
374 PV_NO_DUPLICATE | PV_NUMERIC))
376 if (graph.n_dep_vars > 1)
378 lex_error (lexer, _("Only one variable is allowed."));
382 else if (lex_match_id (lexer, "SCATTERPLOT"))
384 if (graph.chart_type != CT_NONE)
386 lex_error (lexer, _("Only one chart type is allowed."));
389 graph.chart_type = CT_SCATTERPLOT;
390 if (lex_match (lexer, T_LPAREN))
392 if (lex_match_id (lexer, "BIVARIATE"))
394 /* This is the default anyway */
396 else if (lex_match_id (lexer, "OVERLAY"))
398 lex_error (lexer, _("%s is not yet implemented."),"OVERLAY");
401 else if (lex_match_id (lexer, "MATRIX"))
403 lex_error (lexer, _("%s is not yet implemented."),"MATRIX");
406 else if (lex_match_id (lexer, "XYZ"))
408 lex_error(lexer, _("%s is not yet implemented."),"XYZ");
413 lex_error_expecting (lexer, "BIVARIATE", NULL);
416 if (!lex_force_match (lexer, T_RPAREN))
419 if (!lex_force_match (lexer, T_EQUALS))
422 if (!parse_variables_const (lexer, graph.dict,
423 &graph.dep_vars, &graph.n_dep_vars,
424 PV_NO_DUPLICATE | PV_NUMERIC))
427 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1)
429 lex_error(lexer, _("Only one variable is allowed."));
433 if (!lex_force_match (lexer, T_WITH))
436 if (!parse_variables_const (lexer, graph.dict,
437 &graph.dep_vars, &graph.n_dep_vars,
438 PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND))
441 if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2)
443 lex_error (lexer, _("Only one variable is allowed."));
447 if (lex_match (lexer, T_BY))
449 const struct variable *v = NULL;
450 if (!lex_match_variable (lexer,graph.dict,&v))
452 lex_error (lexer, _("Variable expected"));
458 else if (lex_match_id (lexer, "BAR"))
460 lex_error (lexer, _("%s is not yet implemented."),"BAR");
463 else if (lex_match_id (lexer, "LINE"))
465 lex_error (lexer, _("%s is not yet implemented."),"LINE");
468 else if (lex_match_id (lexer, "PIE"))
470 lex_error (lexer, _("%s is not yet implemented."),"PIE");
473 else if (lex_match_id (lexer, "ERRORBAR"))
475 lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR");
478 else if (lex_match_id (lexer, "PARETO"))
480 lex_error (lexer, _("%s is not yet implemented."),"PARETO");
483 else if (lex_match_id (lexer, "TITLE"))
485 lex_error (lexer, _("%s is not yet implemented."),"TITLE");
488 else if (lex_match_id (lexer, "SUBTITLE"))
490 lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE");
493 else if (lex_match_id (lexer, "FOOTNOTE"))
495 lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE");
496 lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH"));
499 else if (lex_match_id (lexer, "MISSING"))
501 lex_match (lexer, T_EQUALS);
503 while (lex_token (lexer) != T_ENDCMD
504 && lex_token (lexer) != T_SLASH)
506 if (lex_match_id (lexer, "LISTWISE"))
508 graph.missing_pw = false;
510 else if (lex_match_id (lexer, "VARIABLE"))
512 graph.missing_pw = true;
514 else if (lex_match_id (lexer, "EXCLUDE"))
516 graph.dep_excl = MV_ANY;
518 else if (lex_match_id (lexer, "INCLUDE"))
520 graph.dep_excl = MV_SYSTEM;
522 else if (lex_match_id (lexer, "REPORT"))
524 graph.fctr_excl = MV_NEVER;
526 else if (lex_match_id (lexer, "NOREPORT"))
528 graph.fctr_excl = MV_ANY;
532 lex_error (lexer, NULL);
539 lex_error (lexer, NULL);
544 switch (graph.chart_type)
547 /* See scatterplot.h for the setup of the case prototype */
548 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value - SP_IDX_X*/
549 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* y value - SP_IDX_Y*/
550 /* The byvar contains the plot categories for the different xy plot colors */
551 if (graph.byvar) /* SP_IDX_BY */
552 graph.gr_proto = caseproto_add_width (graph.gr_proto, var_get_width(graph.byvar));
555 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* x value */
556 graph.gr_proto = caseproto_add_width (graph.gr_proto, 0); /* weight value */
559 lex_error_expecting (lexer,"HISTOGRAM","SCATTERPLOT",NULL);
567 struct casegrouper *grouper;
568 struct casereader *group;
571 grouper = casegrouper_create_splits (proc_open (ds), graph.dict);
572 while (casegrouper_get_next_group (grouper, &group))
573 run_graph (&graph, group);
574 ok = casegrouper_destroy (grouper);
575 ok = proc_commit (ds) && ok;
578 free (graph.dep_vars);
579 pool_destroy (graph.pool);
580 caseproto_unref (graph.gr_proto);
585 caseproto_unref (graph.gr_proto);
586 free (graph.dep_vars);
587 pool_destroy (graph.pool);