From: Friedrich Beckmann Date: Sun, 28 Sep 2014 04:32:17 +0000 (-0700) Subject: Add GRAPH command initially with just scatterplots and histograms. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=80595f6de2ea8f0b64756d76c7ac3c89a1abbbb8 Add GRAPH command initially with just scatterplots and histograms. --- diff --git a/AUTHORS b/AUTHORS index 9350975c73..20998f08eb 100644 --- a/AUTHORS +++ b/AUTHORS @@ -16,6 +16,8 @@ is also an important contributor to GSL, which is used by PSPP. * Mehmet Hakan Satman wrote the QUICK CLUSTER command. +* Friedrich Beckmann wrote the GRAPH command. + We also thank past contributors: * John Williams wrote an initial draft of the T-TEST procedure. diff --git a/NEWS b/NEWS index f605aaab1e..cc24a72365 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,11 @@ See the end for copying conditions. Please send PSPP bug reports to bug-gnu-pspp@gnu.org. +Changes since 0.8.4: + + * The GRAPH command is now available. Initially it support + scatterplots and histograms. + Changes from 0.8.3 to 0.8.4: * Formatting of SYSFILE INFO output was made easier to read. diff --git a/doc/statistics.texi b/doc/statistics.texi index 3a4302873b..7a880d15ab 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -8,6 +8,7 @@ far. * DESCRIPTIVES:: Descriptive statistics. * FREQUENCIES:: Frequency tables. * EXAMINE:: Testing data for normality. +* GRAPH:: Plot data. * CORRELATIONS:: Correlation tables. * CROSSTABS:: Crosstabulation tables. * FACTOR:: Factor analysis and Principal Components analysis. @@ -375,6 +376,52 @@ specified for which there are many distinct values, then @cmd{EXAMINE} will produce a very large quantity of output. +@node GRAPH +@section GRAPH + +@vindex GRAPH +@cindex Exploratory data analysis +@cindex normality, testing + +@display +GRAPH + /HISTOGRAM = @var{var} + /SCATTERPLOT [(BIVARIATE)] = @var{var1} WITH @var{var2} [BY @var{var3}] + [ /MISSING=@{LISTWISE, VARIABLE@} [@{EXCLUDE, INCLUDE@}] ] + [@{NOREPORT,REPORT@}] + +@end display + +The @cmd{GRAPH} produces graphical plots of data. Only one of the subcommands +@subcmd{HISTOGRAM} or @subcmd{SCATTERPLOT} can be specified, i.e. only one plot +can be produced per call of @cmd{GRAPH}. The @subcmd{MISSING} is optional. + +@cindex scatterplot + +The subcommand @subcmd{SCATTERPLOT} produces an xy plot of the data. The different +values of the optional third variable @var{var3} will result in different colours and/or +markers for the plot. The following is an example for producing a scatterplot. + +@example +GRAPH + /SCATTERPLOT = @var{height} WITH @var{weight} BY @var{gender}. +@end example + +This example will produce a scatterplot where height is plotted versus weight. Depending +on the value of the gender variable, the colour of the datapoint is different. With +this plot it is possible to analyze gender differences for height vs. weight relation. + +@cindex histogram + +The subcommand @subcmd{HISTOGRAM} produces a histogram. Only one variable is allowed for +the histogram plot. For an alternative method to produce histograms @pxref{EXAMINE}. The +following example produces a histogram plot for variable weigth. + +@example +GRAPH + /HISTOGRAM = @var{weight}. +@end example + @node CORRELATIONS @section CORRELATIONS diff --git a/src/language/command.def b/src/language/command.def index 8fe737218f..c7b6325073 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -122,6 +122,7 @@ DEF_CMD (S_DATA, 0, "FILTER", cmd_filter) DEF_CMD (S_DATA, 0, "FLIP", cmd_flip) DEF_CMD (S_DATA, 0, "FREQUENCIES", cmd_frequencies) DEF_CMD (S_DATA, 0, "GLM", cmd_glm) +DEF_CMD (S_DATA, 0, "GRAPH", cmd_graph) DEF_CMD (S_DATA, 0, "LIST", cmd_list) DEF_CMD (S_DATA, 0, "LOGISTIC REGRESSION", cmd_logistic) DEF_CMD (S_DATA, 0, "MEANS", cmd_means) @@ -194,7 +195,6 @@ UNIMPL_CMD ("FIT", "Goodness of Fit") UNIMPL_CMD ("GENLOG", "Categorical model fitting") UNIMPL_CMD ("GET TRANSLATE", "Read other file formats") UNIMPL_CMD ("GGRAPH", "Custom defined graphs") -UNIMPL_CMD ("GRAPH", "Draw graphs") UNIMPL_CMD ("HILOGLINEAR", "Hierarchical loglinear models") UNIMPL_CMD ("HOMALS", "Homogeneity analysis") UNIMPL_CMD ("IGRAPH", "Interactive graphs") diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index d923a7d134..bfe379e345 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -26,6 +26,7 @@ language_stats_sources = \ src/language/stats/friedman.c \ src/language/stats/friedman.h \ src/language/stats/glm.c \ + src/language/stats/graph.c \ src/language/stats/kruskal-wallis.c \ src/language/stats/kruskal-wallis.h \ src/language/stats/ks-one-sample.c \ diff --git a/src/language/stats/graph.c b/src/language/stats/graph.c new file mode 100644 index 0000000000..7bfbbc7e10 --- /dev/null +++ b/src/language/stats/graph.c @@ -0,0 +1,540 @@ +/* + PSPP - a program for statistical analysis. + Copyright (C) 2012, 2013 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* + * This module implements the graph command + */ + +#include + +#include +#include + +#include "libpspp/assertion.h" +#include "libpspp/message.h" +#include "libpspp/pool.h" + + +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/caseproto.h" +#include "data/subcase.h" + + +#include "data/format.h" + +#include "math/chart-geometry.h" +#include "math/histogram.h" +#include "math/moments.h" +#include "math/sort.h" +#include "math/order-stats.h" +#include "output/charts/plot-hist.h" +#include "output/charts/scatterplot.h" + +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "language/lexer/variable-parser.h" + +#include "output/tab.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +enum chart_type + { + CT_NONE, + CT_BAR, + CT_LINE, + CT_PIE, + CT_ERRORBAR, + CT_HILO, + CT_HISTOGRAM, + CT_SCATTERPLOT, + CT_PARETO + }; + +enum scatter_type + { + ST_BIVARIATE, + ST_OVERLAY, + ST_MATRIX, + ST_XYZ + }; + +struct exploratory_stats +{ + double missing; + double non_missing; + + struct moments *mom; + + double minimum; + double maximum; + + /* Total weight */ + double cc; + + /* The minimum weight */ + double cmin; +}; + + +struct graph +{ + struct pool *pool; + + size_t n_dep_vars; + const struct variable **dep_vars; + struct exploratory_stats *es; + + enum mv_class dep_excl; + enum mv_class fctr_excl; + + const struct dictionary *dict; + + bool missing_pw; + + /* ------------ Graph ---------------- */ + enum chart_type chart_type; + enum scatter_type scatter_type; + const struct variable *byvar; +}; + + +static void +show_scatterplot (const struct graph *cmd, const struct casereader *input) +{ + struct string title; + struct scatterplot_chart *scatterplot; + bool byvar_overflow = false; + + ds_init_cstr (&title, var_to_string (cmd->dep_vars[0])); + ds_put_cstr (&title, " vs "); + ds_put_cstr (&title, var_to_string (cmd->dep_vars[1])); + if (cmd->byvar) + { + ds_put_cstr (&title, " by "); + ds_put_cstr (&title, var_to_string (cmd->byvar)); + } + + scatterplot = scatterplot_create(input, + cmd->dep_vars[0], + cmd->dep_vars[1], + cmd->byvar, + &byvar_overflow, + ds_cstr (&title), + cmd->es[0].minimum, cmd->es[0].maximum, + cmd->es[1].minimum, cmd->es[1].maximum); + scatterplot_chart_submit(scatterplot); + ds_destroy(&title); + + if (byvar_overflow) + { + msg (MW, _("Maximum number of scatterplot categories reached." + "Your BY variable has too many distinct values." + "The colouring of the plot will not be correct")); + } + + +} + +static void +show_histogr (const struct graph *cmd, const struct casereader *input) +{ + struct histogram *histogram; + struct ccase *c; + struct casereader *reader; + + { + /* Sturges Rule */ + double bin_width = fabs (cmd->es[0].minimum - cmd->es[0].maximum) + / (1 + log2 (cmd->es[0].cc)) + ; + + histogram = + histogram_create (bin_width, cmd->es[0].minimum, cmd->es[0].maximum); + } + + + for (reader=casereader_clone(input);(c = casereader_read (reader)) != NULL; case_unref (c)) + { + const struct variable *var = cmd->dep_vars[0]; + const double x = case_data (c, var)->f; + const double weight = dict_get_case_weight(cmd->dict,c,NULL); + moments_pass_two (cmd->es[0].mom, x, weight); + histogram_add (histogram, x, weight); + } + casereader_destroy(reader); + + + { + double n, mean, var; + + struct string label; + + ds_init_cstr (&label, + var_to_string (cmd->dep_vars[0])); + + moments_calculate (cmd->es[0].mom, &n, &mean, &var, NULL, NULL); + + chart_item_submit + ( histogram_chart_create (histogram->gsl_hist, + ds_cstr (&label), n, mean, + sqrt (var), false)); + + statistic_destroy(&histogram->parent); + ds_destroy (&label); + } +} + +static void +cleanup_exploratory_stats (struct graph *cmd) +{ + int v; + + for (v = 0; v < cmd->n_dep_vars; ++v) + { + moments_destroy (cmd->es[v].mom); + } +} + + +static void +run_graph (struct graph *cmd, struct casereader *input) +{ + struct ccase *c; + struct casereader *reader; + + + cmd->es = pool_calloc(cmd->pool,cmd->n_dep_vars,sizeof(struct exploratory_stats)); + for(int v=0;vn_dep_vars;v++) + { + cmd->es[v].mom = moments_create (MOMENT_KURTOSIS); + cmd->es[v].cmin = DBL_MAX; + cmd->es[v].maximum = -DBL_MAX; + cmd->es[v].minimum = DBL_MAX; + } + /* Always remove cases listwise. This is correct for */ + /* the histogram because there is only one variable */ + /* and a simple bivariate scatterplot */ + /* if ( cmd->missing_pw == false) */ + input = casereader_create_filter_missing (input, + cmd->dep_vars, + cmd->n_dep_vars, + cmd->dep_excl, + NULL, + NULL); + + for (reader = casereader_clone (input); + (c = casereader_read (reader)) != NULL; case_unref (c)) + { + const double weight = dict_get_case_weight(cmd->dict,c,NULL); + for(int v=0;vn_dep_vars;v++) + { + const struct variable *var = cmd->dep_vars[v]; + const double x = case_data (c, var)->f; + + if (var_is_value_missing (var, case_data (c, var), cmd->dep_excl)) + { + cmd->es[v].missing += weight; + continue; + } + + if (x > cmd->es[v].maximum) + cmd->es[v].maximum = x; + + if (x < cmd->es[v].minimum) + cmd->es[v].minimum = x; + + cmd->es[v].non_missing += weight; + + moments_pass_one (cmd->es[v].mom, x, weight); + + cmd->es[v].cc += weight; + + if (cmd->es[v].cmin > weight) + cmd->es[v].cmin = weight; + } + } + casereader_destroy (reader); + + switch (cmd->chart_type) + { + case CT_HISTOGRAM: + reader = casereader_clone(input); + show_histogr(cmd,reader); + casereader_destroy(reader); + break; + case CT_SCATTERPLOT: + reader = casereader_clone(input); + show_scatterplot(cmd,reader); + casereader_destroy(reader); + break; + default: + NOT_REACHED (); + break; + }; + + casereader_destroy(input); + + cleanup_exploratory_stats (cmd); +} + + +int +cmd_graph (struct lexer *lexer, struct dataset *ds) +{ + struct graph graph; + + graph.missing_pw = false; + + graph.pool = pool_create (); + + graph.dep_excl = MV_ANY; + graph.fctr_excl = MV_ANY; + + graph.dict = dataset_dict (ds); + + + /* ---------------- graph ------------------ */ + graph.dep_vars = NULL; + graph.chart_type = CT_NONE; + graph.scatter_type = ST_BIVARIATE; + graph.byvar = NULL; + + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id(lexer, "HISTOGRAM")) + { + if (graph.chart_type != CT_NONE) + { + lex_error(lexer, _("Only one chart type is allowed.")); + goto error; + } + if (!lex_force_match (lexer, T_EQUALS)) + goto error; + graph.chart_type = CT_HISTOGRAM; + if (!parse_variables_const (lexer, graph.dict, + &graph.dep_vars, &graph.n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + if (graph.n_dep_vars > 1) + { + lex_error(lexer, _("Only one variable allowed")); + goto error; + } + } + else if (lex_match_id (lexer, "SCATTERPLOT")) + { + if (graph.chart_type != CT_NONE) + { + lex_error(lexer, _("Only one chart type is allowed.")); + goto error; + } + graph.chart_type = CT_SCATTERPLOT; + if (lex_match (lexer, T_LPAREN)) + { + if (lex_match_id (lexer, "BIVARIATE")) + { + /* This is the default anyway */ + } + else if (lex_match_id (lexer, "OVERLAY")) + { + lex_error(lexer, _("%s is not yet implemented."),"OVERLAY"); + goto error; + } + else if (lex_match_id (lexer, "MATRIX")) + { + lex_error(lexer, _("%s is not yet implemented."),"MATRIX"); + goto error; + } + else if (lex_match_id (lexer, "XYZ")) + { + lex_error(lexer, _("%s is not yet implemented."),"XYZ"); + goto error; + } + else + { + lex_error_expecting(lexer, "BIVARIATE", NULL); + goto error; + } + if (!lex_force_match (lexer, T_RPAREN)) + goto error; + } + if (!lex_force_match (lexer, T_EQUALS)) + goto error; + + if (!parse_variables_const (lexer, graph.dict, + &graph.dep_vars, &graph.n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + goto error; + + if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 1) + { + lex_error(lexer, _("Only one variable allowed")); + goto error; + } + + if (!lex_force_match (lexer, T_WITH)) + goto error; + + if (!parse_variables_const (lexer, graph.dict, + &graph.dep_vars, &graph.n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC | PV_APPEND)) + goto error; + + if (graph.scatter_type == ST_BIVARIATE && graph.n_dep_vars != 2) + { + lex_error(lexer, _("Only one variable allowed")); + goto error; + } + + if (lex_match(lexer, T_BY)) + { + const struct variable *v = NULL; + if (!lex_match_variable (lexer,graph.dict,&v)) + { + lex_error(lexer, _("Variable expected")); + goto error; + } + graph.byvar = v; + } + } + else if (lex_match_id (lexer, "BAR")) + { + lex_error (lexer, _("%s is not yet implemented."),"BAR"); + goto error; + } + else if (lex_match_id (lexer, "LINE")) + { + lex_error (lexer, _("%s is not yet implemented."),"LINE"); + goto error; + } + else if (lex_match_id (lexer, "PIE")) + { + lex_error (lexer, _("%s is not yet implemented."),"PIE"); + goto error; + } + else if (lex_match_id (lexer, "ERRORBAR")) + { + lex_error (lexer, _("%s is not yet implemented."),"ERRORBAR"); + goto error; + } + else if (lex_match_id (lexer, "PARETO")) + { + lex_error (lexer, _("%s is not yet implemented."),"PARETO"); + goto error; + } + else if (lex_match_id (lexer, "TITLE")) + { + lex_error (lexer, _("%s is not yet implemented."),"TITLE"); + goto error; + } + else if (lex_match_id (lexer, "SUBTITLE")) + { + lex_error (lexer, _("%s is not yet implemented."),"SUBTITLE"); + goto error; + } + else if (lex_match_id (lexer, "FOOTNOTE")) + { + lex_error (lexer, _("%s is not yet implemented."),"FOOTNOTE"); + lex_error (lexer, _("FOOTNOTE is not implemented yet for GRAPH")); + goto error; + } + else if (lex_match_id (lexer, "MISSING")) + { + lex_match (lexer, T_EQUALS); + + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "LISTWISE")) + { + graph.missing_pw = false; + } + else if (lex_match_id (lexer, "VARIABLE")) + { + graph.missing_pw = true; + } + else if (lex_match_id (lexer, "EXCLUDE")) + { + graph.dep_excl = MV_ANY; + } + else if (lex_match_id (lexer, "INCLUDE")) + { + graph.dep_excl = MV_SYSTEM; + } + else if (lex_match_id (lexer, "REPORT")) + { + graph.fctr_excl = MV_NEVER; + } + else if (lex_match_id (lexer, "NOREPORT")) + { + graph.fctr_excl = MV_ANY; + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + } + else + { + lex_error (lexer, NULL); + goto error; + } + } + + if (graph.chart_type == CT_NONE) + { + lex_error_expecting(lexer,"HISTOGRAM","SCATTERPLOT",NULL); + goto error; + } + + + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), graph.dict); + while (casegrouper_get_next_group (grouper, &group)) + run_graph (&graph, group); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } + + free (graph.dep_vars); + pool_destroy (graph.pool); + + return CMD_SUCCESS; + + error: + free (graph.dep_vars); + pool_destroy (graph.pool); + + return CMD_FAILURE; +} diff --git a/src/output/automake.mk b/src/output/automake.mk index 0b3e1fd78c..39c52d7496 100644 --- a/src/output/automake.mk +++ b/src/output/automake.mk @@ -24,6 +24,8 @@ src_output_liboutput_la_SOURCES = \ src/output/charts/spreadlevel-plot.h \ src/output/charts/scree.c \ src/output/charts/scree.h \ + src/output/charts/scatterplot.c \ + src/output/charts/scatterplot.h \ src/output/csv.c \ src/output/driver-provider.h \ src/output/driver.c \ @@ -70,7 +72,8 @@ src_output_liboutput_la_SOURCES += \ src/output/charts/plot-hist-cairo.c \ src/output/charts/roc-chart-cairo.c \ src/output/charts/scree-cairo.c \ - src/output/charts/spreadlevel-cairo.c + src/output/charts/spreadlevel-cairo.c \ + src/output/charts/scatterplot-cairo.c endif if ODF_WRITE_SUPPORT src_output_liboutput_la_SOURCES += src/output/odt.c diff --git a/src/output/cairo-chart.h b/src/output/cairo-chart.h index 646b1cc3d7..edf4ed1148 100644 --- a/src/output/cairo-chart.h +++ b/src/output/cairo-chart.h @@ -173,6 +173,8 @@ void xrchart_draw_scree (const struct chart_item *, cairo_t *, struct xrchart_geometry *); void xrchart_draw_spreadlevel (const struct chart_item *, cairo_t *, struct xrchart_geometry *); +void xrchart_draw_scatterplot (const struct chart_item *, cairo_t *, + struct xrchart_geometry *); #endif /* output/cairo-chart.h */ diff --git a/src/output/cairo.c b/src/output/cairo.c index 5197ffee9a..9a0cb6797e 100644 --- a/src/output/cairo.c +++ b/src/output/cairo.c @@ -34,6 +34,7 @@ #include "output/charts/roc-chart.h" #include "output/charts/spreadlevel-plot.h" #include "output/charts/scree.h" +#include "output/charts/scatterplot.h" #include "output/driver-provider.h" #include "output/message-item.h" #include "output/options.h" @@ -1420,6 +1421,8 @@ xr_draw_chart (const struct chart_item *chart_item, cairo_t *cr, xrchart_draw_scree (chart_item, cr, &geom); else if (is_spreadlevel_plot_chart (chart_item)) xrchart_draw_spreadlevel (chart_item, cr, &geom); + else if (is_scatterplot_chart (chart_item)) + xrchart_draw_scatterplot (chart_item, cr, &geom); else NOT_REACHED (); xrchart_geometry_free (cr, &geom); diff --git a/src/output/charts/scatterplot-cairo.c b/src/output/charts/scatterplot-cairo.c new file mode 100644 index 0000000000..b555a12088 --- /dev/null +++ b/src/output/charts/scatterplot-cairo.c @@ -0,0 +1,117 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2014 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "output/charts/scatterplot.h" + +#include "data/case.h" +#include "data/casereader.h" +#include "data/variable.h" +#include "output/cairo-chart.h" +#include "libpspp/str.h" +#include "libpspp/message.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + + +void +xrchart_draw_scatterplot (const struct chart_item *chart_item, cairo_t *cr, + struct xrchart_geometry *geom) +{ + const struct scatterplot_chart *spc = to_scatterplot_chart (chart_item); + struct casereader *data; + struct ccase *c; + /* While reading the cases, a list with categories of the byvar is build */ + /* All distinct values are stored in catvals */ + /* Each category will later have a different plot colour */ + const int MAX_PLOT_CATS = 20; + union value catvals[MAX_PLOT_CATS]; + int n_catvals = 0; + int byvar_width = 0; + int i = 0; + const struct xrchart_colour *colour; + + if (spc->byvar) + byvar_width = var_get_width(spc->byvar); + + xrchart_write_xscale (cr, geom, + spc->x_min, + spc->x_max, 5); + xrchart_write_yscale (cr, geom, spc->y_min, spc->y_max, 5); + xrchart_write_title (cr, geom, _("Scatterplot %s"), chart_item->title); + xrchart_write_xlabel (cr, geom, var_to_string(spc->xvar)); + xrchart_write_ylabel (cr, geom, var_to_string(spc->yvar)); + + cairo_save (cr); + data = casereader_clone (spc->data); + for (; (c = casereader_read (data)) != NULL; case_unref (c)) + { + if (spc->byvar) + { + const union value *val = case_data(c,spc->byvar); + for(i=0;ibyvar,val,MV_ANY)) + ds_put_cstr(&label,"missing"); + else + var_append_value_name(spc->byvar,val,&label); + value_clone(&catvals[n_catvals++],val,byvar_width); + geom->n_datasets++; + geom->dataset = xrealloc (geom->dataset, + geom->n_datasets * sizeof (*geom->dataset)); + + geom->dataset[geom->n_datasets - 1] = strdup(ds_cstr(&label)); + ds_destroy(&label); + } + else /* Use the last plot category */ + { + *(spc->byvar_overflow) = true; + i--; + } + } + } + colour = &data_colour [ i % XRCHART_N_COLOURS]; + cairo_set_source_rgb (cr, + colour->red / 255.0, + colour->green / 255.0, + colour->blue / 255.0); + + xrchart_datum (cr, geom, 0, + case_data (c, spc->xvar)->f, + case_data (c, spc->yvar)->f); + } + casereader_destroy (data); + cairo_restore(cr); + + for(i=0;ibyvar) + xrchart_write_legend(cr, geom); + + + + // xrchart_line (cr, geom, npp->slope, npp->intercept, + // npp->y_first, npp->y_last, XRCHART_DIM_Y); + +} diff --git a/src/output/charts/scatterplot.c b/src/output/charts/scatterplot.c new file mode 100644 index 0000000000..920bc17367 --- /dev/null +++ b/src/output/charts/scatterplot.c @@ -0,0 +1,73 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2014 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "output/charts/scatterplot.h" + +#include + +#include "data/casereader.h" +#include "libpspp/cast.h" +#include "output/chart-item-provider.h" + +#include "gl/minmax.h" + + +/* Creates a scatterplot + + The caller retains ownership of READER. */ +struct scatterplot_chart * +scatterplot_create (const struct casereader *reader, + const struct variable *xvar, + const struct variable *yvar, + const struct variable *byvar, + bool *byvar_overflow, + const char *label, + double xmin, double xmax, double ymin, double ymax) +{ + struct scatterplot_chart *spc; + + spc = xzalloc (sizeof *spc); + chart_item_init (&spc->chart_item, &scatterplot_chart_class, label); + spc->data = casereader_clone (reader); + + spc->y_min = ymin; + spc->y_max = ymax; + + spc->x_min = xmin; + spc->x_max = xmax; + + spc->xvar = xvar; + spc->yvar = yvar; + spc->byvar = byvar; + spc->byvar_overflow = byvar_overflow; + + return spc; +} + +static void +scatterplot_chart_destroy (struct chart_item *chart_item) +{ + struct scatterplot_chart *spc = to_scatterplot_chart (chart_item); + casereader_destroy (spc->data); + free (spc); +} + +const struct chart_item_class scatterplot_chart_class = + { + scatterplot_chart_destroy + }; diff --git a/src/output/charts/scatterplot.h b/src/output/charts/scatterplot.h new file mode 100644 index 0000000000..e95562bbad --- /dev/null +++ b/src/output/charts/scatterplot.h @@ -0,0 +1,106 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2014 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef OUTPUT_CHARTS_SCATTERPLOT_H +#define OUTPUT_CHARTS_SCATTERPLOT_H 1 + +#include "output/chart-item.h" + +/* A scatterplot. */ +struct scatterplot_chart + { + struct chart_item chart_item; + struct casereader *data; + const struct variable *xvar, *yvar, *byvar; + + double y_min, y_max; + double x_min, x_max; + /* If the number of distinct values of byvar */ + /* exceeds a certain limit, the warning flag */ + /* is activated after the chart is drawn */ + bool *byvar_overflow; + }; + +struct scatterplot_chart * +scatterplot_create (const struct casereader *, + const struct variable *, + const struct variable *, + const struct variable *, + bool *, + const char *label, + double xmin, double xmax, double ymin, double ymax); + +/* This boilerplate for scatterplot_chart, a subclass of chart_item, was + autogenerated by mk-class-boilerplate. */ + +#include +#include "libpspp/cast.h" + +extern const struct chart_item_class scatterplot_chart_class; + +/* Returns true if SUPER is a scatterplot_chart, otherwise false. */ +static inline bool +is_scatterplot_chart (const struct chart_item *super) +{ + return super->class == &scatterplot_chart_class; +} + +/* Returns SUPER converted to scatterplot_chart. SUPER must be a scatterplot_chart, as + reported by is_scatterplot_chart. */ +static inline struct scatterplot_chart * +to_scatterplot_chart (const struct chart_item *super) +{ + assert (is_scatterplot_chart (super)); + return UP_CAST (super, struct scatterplot_chart, chart_item); +} + +/* Returns INSTANCE converted to chart_item. */ +static inline struct chart_item * +scatterplot_chart_super (const struct scatterplot_chart *instance) +{ + return CONST_CAST (struct chart_item *, &instance->chart_item); +} + +/* Increments INSTANCE's reference count and returns INSTANCE. */ +static inline struct scatterplot_chart * +scatterplot_chart_ref (const struct scatterplot_chart *instance) +{ + return to_scatterplot_chart (chart_item_ref (&instance->chart_item)); +} + +/* Decrements INSTANCE's reference count, then destroys INSTANCE if + the reference count is now zero. */ +static inline void +scatterplot_chart_unref (struct scatterplot_chart *instance) +{ + chart_item_unref (&instance->chart_item); +} + +/* Returns true if INSTANCE's reference count is greater than 1, + false otherwise. */ +static inline bool +scatterplot_chart_is_shared (const struct scatterplot_chart *instance) +{ + return chart_item_is_shared (&instance->chart_item); +} + +static inline void +scatterplot_chart_submit (struct scatterplot_chart *instance) +{ + chart_item_submit (&instance->chart_item); +} + +#endif /* output/charts/scatterplot.h */ diff --git a/tests/automake.mk b/tests/automake.mk index 7677b4c2b3..b9df0539e1 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -312,6 +312,7 @@ TESTSUITE_AT = \ tests/language/stats/crosstabs.at \ tests/language/stats/descriptives.at \ tests/language/stats/examine.at \ + tests/language/stats/graph.at \ tests/language/stats/factor.at \ tests/language/stats/flip.at \ tests/language/stats/frequencies.at \ diff --git a/tests/language/stats/graph.at b/tests/language/stats/graph.at new file mode 100644 index 0000000000..f4b125b24e --- /dev/null +++ b/tests/language/stats/graph.at @@ -0,0 +1,149 @@ +AT_BANNER([GRAPH]) + +AT_SETUP([GRAPH simple scatterplot]) +AT_DATA([scatterplot.sps],[ +* Simple Scatterplot test +NEW FILE. +INPUT PROGRAM. +LOOP #i = 1 to 100. +COMPUTE Age = RV.NORMAL(40,10). +END CASE. +END LOOP. +END FILE. +END INPUT PROGRAM. + +COMPUTE Size = Age * 3 + 50. + +GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH Size. + +]) + +AT_CHECK([pspp -O format=csv scatterplot.sps], [0], [ignore]) + +AT_CLEANUP + + +AT_SETUP([GRAPH Scatter and Histogram]) +AT_DATA([scatterlong.sps],[ +NEW FILE. +INPUT PROGRAM. +LOOP #i = 1 to 10000. +COMPUTE Age = RV.NORMAL(40,10). +COMPUTE CityNum = TRUNC(UNIFORM(2.95)). +END CASE. +END LOOP. +END FILE. +END INPUT PROGRAM. + +COMPUTE Size = Age * 3 + 50 + 50*CityNum. + +STRING City (a20). + +Recode CityNum + (0 = "Madrid") + (1 = "Paris") + (ELSE = "Stockholm") + into City. + + GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH Size + + GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH CityNum + + GRAPH + /SCATTERPLOT = CityNum WITH Age + + GRAPH + /SCATTERPLOT = CityNum WITH Size + + GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH Size BY City + + GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH Size BY CityNum + + ADD VALUE LABELS + /CityNum 1 'Rio' 2 'Tokyo' 0 'Mumbai'. + + GRAPH + /SCATTERPLOT(BIVARIATE) = Age WITH Size BY CityNum + + GRAPH + /HISTOGRAM = Age. + +]) + +AT_CHECK([pspp -O format=pdf scatterlong.sps], [0], [ignore]) +AT_CLEANUP + +AT_SETUP([GRAPH missing values don't crash]) +AT_DATA([scatter.sps], [dnl +data list list /x * y *. +begin data. +1 0 +2 0 +. 0 +3 1 +4 1 +5 . +6 1 +end data. +graph + /scatterplot = x with y. +graph + /histogram = x. +]) +AT_CHECK([pspp -o pspp.pdf scatter.sps]) +dnl Ignore output -- this is just a no-crash check. +AT_CLEANUP + +AT_SETUP([GRAPH missing=VARIABLE no crash]) +AT_DATA([scatter.sps], [dnl +data list list /x * y *. +begin data. +1 0 +2 0 +. 0 +3 1 +4 1 +5 . +6 1 +end data. +graph + /scatterplot = x with y + /missing = VARIABLE. +graph + /histogram = x + /missing = VARIABLE. +]) +AT_CHECK([pspp -o pspp.pdf scatter.sps]) +dnl Ignore output -- this is just a no-crash check. +AT_CLEANUP + +AT_SETUP([GRAPH missing value in by variable]) +AT_DATA([scatter.sps], [dnl +data list list /x * y * z *. +begin data. +1 0 9 +2 0 9 +. 0 9 +3 1 . +4 1 8 +5 . 8 +6 1 8 +end data. +graph + /scatterplot = x with y by z + /missing = VARIABLE. + +graph + /scatterplot = x with y by z. + +]) +AT_CHECK([pspp -o pspp.pdf scatter.sps]) +dnl Ignore output -- this is just a no-crash check. +AT_CLEANUP + +