From 39d61ddbc7ad08260bd7e0b28da218fb9a6360a4 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 31 Mar 2012 17:06:52 +0200 Subject: [PATCH] Examine: Added the /PLOT=SPREADLEVEL option --- doc/statistics.texi | 20 ++++- src/language/stats/examine.c | 73 +++++++++++++++++ src/output/automake.mk | 5 +- src/output/cairo-chart.h | 3 + src/output/cairo.c | 3 + src/output/charts/spreadlevel-cairo.c | 49 ++++++++++++ src/output/charts/spreadlevel-plot.c | 91 ++++++++++++++++++++++ src/output/charts/spreadlevel-plot.h | 108 ++++++++++++++++++++++++++ tests/output/charts.at | 2 +- 9 files changed, 348 insertions(+), 6 deletions(-) create mode 100644 src/output/charts/spreadlevel-cairo.c create mode 100644 src/output/charts/spreadlevel-plot.c create mode 100644 src/output/charts/spreadlevel-plot.h diff --git a/doc/statistics.texi b/doc/statistics.texi index b5ec8a4b9d..f8a8864da7 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -220,7 +220,7 @@ EXAMINE [ @var{factor3} [BY @var{subfactor3}]] ] /STATISTICS=@{DESCRIPTIVES, EXTREME[(@var{n})], ALL, NONE@} - /PLOT=@{BOXPLOT, NPPLOT, HISTOGRAM, ALL, NONE@} + /PLOT=@{BOXPLOT, NPPLOT, HISTOGRAM, SPREADLEVEL[(@var{t})], ALL, NONE@} /CINTERVAL @var{p} /COMPARE=@{GROUPS,VARIABLES@} /ID=@var{identity_variable} @@ -274,12 +274,24 @@ specified. @cindex boxplot @cindex histogram @cindex npplot +@cindex spreadlevel plot The @subcmd{PLOT} subcommand specifies which plots are to be produced if any. -Available plots are @subcmd{HISTOGRAM}, @subcmd{NPPLOT} and @subcmd{BOXPLOT}. -They can all be used to visualise how closely each cell conforms to a -normal distribution. +Available plots are @subcmd{HISTOGRAM}, @subcmd{NPPLOT}, @subcmd{BOXPLOT} and +@subcmd{SPREADLEVEL}. +The first three can be used to visualise how closely each cell conforms to a +normal distribution, whilst the spread vs.@: level plot can be useful to visualise +how the variance of differs between factors. Boxplots will also show you the outliers and extreme values. +The @subcmd{SPREADLEVEL} plot displays the interquartile range versus the +median. It takes an optional parameter @var{t}, which specifies how the data +should be transformed prior to plotting. +The given value @var{t} is a power to which the data is raised. For example, if +@var{t} is given as 2, then the data will be squared. +Zero, however is a special value. If @var{t} is 0 or +is omitted, then data will be transformed by taking its natural logarithm instead of +raising to the power of @var{t}. + The @subcmd{COMPARE} subcommand is only relevant if producing boxplots, and it is only useful there is more than one dependent variable and at least one factor. If diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 6be74bc8fd..0cef962827 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -52,6 +52,7 @@ #include "output/charts/boxplot.h" #include "output/charts/np-plot.h" +#include "output/charts/spreadlevel-plot.h" #include "output/charts/plot-hist.h" #include "language/command.h" @@ -121,6 +122,8 @@ struct examine bool npplot; bool histogram; bool boxplot; + bool spreadlevel; + int sl_power; enum bp_mode boxplot_mode; @@ -455,6 +458,58 @@ show_npplot (const struct examine *cmd, int iact_idx) } } +static void +show_spreadlevel (const struct examine *cmd, int iact_idx) +{ + const struct interaction *iact = cmd->iacts[iact_idx]; + const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + + int v; + + /* Spreadlevel when there are no levels is not useful */ + if (iact->n_vars == 0) + return; + + for (v = 0; v < cmd->n_dep_vars; ++v) + { + int grp; + struct chart_item *sl; + + struct string label; + ds_init_cstr (&label, + var_to_string (cmd->dep_vars[v])); + + if (iact->n_vars > 0) + { + ds_put_cstr (&label, " ("); + interaction_to_string (iact, &label); + ds_put_cstr (&label, ")"); + } + + sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power); + + for (grp = 0; grp < n_cats; ++grp) + { + const struct exploratory_stats *es = + categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); + + double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg); + + double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) - + percentile_calculate (es[v].quartiles[0], cmd->pc_alg); + + spreadlevel_plot_add (sl, iqr, median); + } + + if (sl == NULL) + msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label)); + else + chart_item_submit (sl); + + ds_destroy (&label); + } +} + static void show_histogram (const struct examine *cmd, int iact_idx) @@ -1814,6 +1869,9 @@ run_examine (struct examine *cmd, struct casereader *input) if (cmd->npplot) show_npplot (cmd, i); + if (cmd->spreadlevel) + show_spreadlevel (cmd, i); + if (cmd->descriptives) descriptives_report (cmd, i); } @@ -1864,6 +1922,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.histogram = false; examine.npplot = false; examine.boxplot = false; + examine.spreadlevel = false; + examine.sl_power = 0; examine.dict = dataset_dict (ds); @@ -2091,6 +2151,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { examine.histogram = true; } + else if (lex_match_id (lexer, "SPREADLEVEL")) + { + examine.spreadlevel = true; + examine.sl_power = 0; + if (lex_match (lexer, T_LPAREN)) + { + examine.sl_power = lex_integer (lexer); + + lex_get (lexer); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + } else if (lex_match_id (lexer, "NONE")) { examine.histogram = false; diff --git a/src/output/automake.mk b/src/output/automake.mk index 78173e3ef2..49c49e9b28 100644 --- a/src/output/automake.mk +++ b/src/output/automake.mk @@ -20,6 +20,8 @@ src_output_liboutput_la_SOURCES = \ src/output/charts/plot-hist.h \ src/output/charts/roc-chart.c \ src/output/charts/roc-chart.h \ + src/output/charts/spreadlevel-plot.c \ + src/output/charts/spreadlevel-plot.h \ src/output/charts/scree.c \ src/output/charts/scree.h \ src/output/csv.c \ @@ -66,7 +68,8 @@ src_output_liboutput_la_SOURCES += \ src/output/charts/piechart-cairo.c \ src/output/charts/plot-hist-cairo.c \ src/output/charts/roc-chart-cairo.c \ - src/output/charts/scree-cairo.c + src/output/charts/scree-cairo.c \ + src/output/charts/spreadlevel-cairo.c endif if ODF_WRITE_SUPPORT src_output_liboutput_la_SOURCES += src/output/odt.c diff --git a/src/output/cairo-chart.h b/src/output/cairo-chart.h index 5e8123ed1a..090bc9cede 100644 --- a/src/output/cairo-chart.h +++ b/src/output/cairo-chart.h @@ -162,5 +162,8 @@ void xrchart_draw_np_plot (const struct chart_item *, cairo_t *, struct xrchart_geometry *); void xrchart_draw_scree (const struct chart_item *, cairo_t *, struct xrchart_geometry *); +void xrchart_draw_spreadlevel (const struct chart_item *, cairo_t *, + struct xrchart_geometry *); + #endif /* output/cairo-chart.h */ diff --git a/src/output/cairo.c b/src/output/cairo.c index cfa9669329..6229311aa7 100644 --- a/src/output/cairo.c +++ b/src/output/cairo.c @@ -32,6 +32,7 @@ #include "output/charts/piechart.h" #include "output/charts/plot-hist.h" #include "output/charts/roc-chart.h" +#include "output/charts/spreadlevel-plot.h" #include "output/charts/scree.h" #include "output/driver-provider.h" #include "output/message-item.h" @@ -1091,6 +1092,8 @@ xr_draw_chart (const struct chart_item *chart_item, cairo_t *cr, xrchart_draw_roc (chart_item, cr, &geom); else if (is_scree (chart_item)) xrchart_draw_scree (chart_item, cr, &geom); + else if (is_spreadlevel_plot_chart (chart_item)) + xrchart_draw_spreadlevel (chart_item, cr, &geom); else NOT_REACHED (); xrchart_geometry_free (cr, &geom); diff --git a/src/output/charts/spreadlevel-cairo.c b/src/output/charts/spreadlevel-cairo.c new file mode 100644 index 0000000000..eeb3c813be --- /dev/null +++ b/src/output/charts/spreadlevel-cairo.c @@ -0,0 +1,49 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "output/charts/spreadlevel-plot.h" + +#include + +#include "output/cairo-chart.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +void +xrchart_draw_spreadlevel (const struct chart_item *chart_item, cairo_t *cr, + struct xrchart_geometry *geom) +{ + const struct spreadlevel_plot_chart *sl = to_spreadlevel_plot_chart (chart_item); + size_t i; + + const char *name = chart_item_get_title (chart_item); + + xrchart_write_title (cr, geom, _("Spread vs. Level Plot of %s"), name); + xrchart_write_xlabel (cr, geom, _("Level")); + xrchart_write_ylabel (cr, geom, _("Spread")); + + + xrchart_write_xscale (cr, geom, sl->x_lower, sl->x_upper, 5); + xrchart_write_yscale (cr, geom, sl->y_lower, sl->y_upper, 5); + + for (i = 0 ; i < sl->n_data; ++i) + { + xrchart_datum (cr, geom, 0, sl->data[i].x, sl->data[i].y); + } +} diff --git a/src/output/charts/spreadlevel-plot.c b/src/output/charts/spreadlevel-plot.c new file mode 100644 index 0000000000..8fcc0bdeeb --- /dev/null +++ b/src/output/charts/spreadlevel-plot.c @@ -0,0 +1,91 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "output/charts/spreadlevel-plot.h" + +#include "libpspp/cast.h" +#include "output/chart-item-provider.h" + +#include "gl/xalloc.h" +#include "gl/minmax.h" + +#include +#include + +struct chart_item * +spreadlevel_plot_create (const char *label, double tx_pwr) +{ + struct spreadlevel_plot_chart *sl = xzalloc (sizeof *sl); + chart_item_init (&sl->chart_item, &spreadlevel_plot_chart_class, label); + + sl->x_lower = DBL_MAX; + sl->x_upper = -DBL_MAX; + + sl->y_lower = DBL_MAX; + sl->y_upper = -DBL_MAX; + + sl->tx_pwr = tx_pwr; + + sl->n_data = 0; + sl->data = NULL; + + return &sl->chart_item; +} + +void +spreadlevel_plot_add (struct chart_item *ci, double spread, double level) +{ + struct spreadlevel_plot_chart *sl = to_spreadlevel_plot_chart (ci); + + if ( sl->tx_pwr == 0) + { + spread = log (spread); + level = log (level); + } + else + { + spread = pow (spread, sl->tx_pwr); + level = pow (level, sl->tx_pwr); + } + + sl->x_lower = MIN (sl->x_lower, level); + sl->x_upper = MAX (sl->x_upper, level); + + sl->y_lower = MIN (sl->y_lower, spread); + sl->y_upper = MAX (sl->y_upper, spread); + + sl->n_data++; + sl->data = xrealloc (sl->data, sizeof (*sl->data) * sl->n_data); + sl->data[sl->n_data - 1].x = level; + sl->data[sl->n_data - 1].y = spread; +} + + +static void +spreadlevel_plot_chart_destroy (struct chart_item *chart_item) +{ + struct spreadlevel_plot_chart *sl = to_spreadlevel_plot_chart (chart_item); + + free (sl->data); + free (sl); +} + +const struct chart_item_class spreadlevel_plot_chart_class = + { + spreadlevel_plot_chart_destroy + }; diff --git a/src/output/charts/spreadlevel-plot.h b/src/output/charts/spreadlevel-plot.h new file mode 100644 index 0000000000..550ccb9893 --- /dev/null +++ b/src/output/charts/spreadlevel-plot.h @@ -0,0 +1,108 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2012 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef OUTPUT_CHARTS_SL_PLOT_H +#define OUTPUT_CHARTS_SL_PLOT_H 1 + +#include "output/chart-item.h" + +struct datum +{ + double x; + double y; +}; + +struct spreadlevel_plot_chart + { + struct chart_item chart_item; + + /* Calculated parameters */ + double y_lower, y_upper; + double x_lower, x_upper; + + double tx_pwr; + + size_t n_data; + struct datum *data; + }; + +struct chart_item *spreadlevel_plot_create (const char *label, double slp); + +void spreadlevel_plot_add (struct chart_item *, double spread, double level); + + + +/* This boilerplate for spreadlevel_plot_chart, a subclass of chart_item, was + autogenerated by mk-class-boilerplate. */ + +#include +#include "libpspp/cast.h" + +extern const struct chart_item_class spreadlevel_plot_chart_class; + +/* Returns true if SUPER is a spreadlevel_plot_chart, otherwise false. */ +static inline bool +is_spreadlevel_plot_chart (const struct chart_item *super) +{ + return super->class == &spreadlevel_plot_chart_class; +} + +/* Returns SUPER converted to spreadlevel_plot_chart. SUPER must be a spreadlevel_plot_chart, as + reported by is_spreadlevel_plot_chart. */ +static inline struct spreadlevel_plot_chart * +to_spreadlevel_plot_chart (const struct chart_item *super) +{ + assert (is_spreadlevel_plot_chart (super)); + return UP_CAST (super, struct spreadlevel_plot_chart, chart_item); +} + +/* Returns INSTANCE converted to chart_item. */ +static inline struct chart_item * +spreadlevel_plot_chart_super (const struct spreadlevel_plot_chart *instance) +{ + return CONST_CAST (struct chart_item *, &instance->chart_item); +} + +/* Increments INSTANCE's reference count and returns INSTANCE. */ +static inline struct spreadlevel_plot_chart * +spreadlevel_plot_chart_ref (const struct spreadlevel_plot_chart *instance) +{ + return to_spreadlevel_plot_chart (chart_item_ref (&instance->chart_item)); +} + +/* Decrements INSTANCE's reference count, then destroys INSTANCE if + the reference count is now zero. */ +static inline void +spreadlevel_plot_chart_unref (struct spreadlevel_plot_chart *instance) +{ + chart_item_unref (&instance->chart_item); +} + +/* Returns true if INSTANCE's reference count is greater than 1, + false otherwise. */ +static inline bool +spreadlevel_plot_chart_is_shared (const struct spreadlevel_plot_chart *instance) +{ + return chart_item_is_shared (&instance->chart_item); +} + +static inline void +spreadlevel_plot_chart_submit (struct spreadlevel_plot_chart *instance) +{ + chart_item_submit (&instance->chart_item); +} + +#endif diff --git a/tests/output/charts.at b/tests/output/charts.at index e2a43d5286..302e05b151 100644 --- a/tests/output/charts.at +++ b/tests/output/charts.at @@ -19,7 +19,7 @@ end file. end input program. examine x y by a - /plot = histogram, npplot + /plot = histogram, npplot spreadlevel(1) . examine x y by a -- 2.30.2