/*
PSPP - a program for statistical analysis.
- Copyright (C) 2012 Free Software Foundation, Inc.
+ Copyright (C) 2012, 2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "output/charts/boxplot.h"
#include "output/charts/np-plot.h"
+#include "output/charts/spreadlevel-plot.h"
#include "output/charts/plot-hist.h"
#include "language/command.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
+static void
+append_value_name (const struct variable *var, const union value *val, struct string *str)
+{
+ var_append_value_name (var, val, str);
+ if ( var_is_value_missing (var, val, MV_ANY))
+ ds_put_cstr (str, _(" (missing)"));
+}
+
enum bp_mode
{
BP_GROUPS,
size_t n_iacts;
struct interaction **iacts;
- enum mv_class exclude;
+ enum mv_class dep_excl;
+ enum mv_class fctr_excl;
const struct dictionary *dict;
size_t n_percentiles;
bool npplot;
- bool histogram;
+ bool histogramplot;
bool boxplot;
+ bool spreadlevelplot;
+ int sl_power;
enum bp_mode boxplot_mode;
ds_init_empty (&label);
for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
{
+ struct string l;
const struct variable *ivar = iact->vars[ivar_idx];
const union value *val = case_data (c, ivar);
-
- ds_put_cstr (&label, var_to_string (ivar));
- ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
- ds_put_cstr (&label, "; ");
+ ds_init_empty (&l);
+
+ append_value_name (ivar, val, &l);
+ ds_ltrim (&l, ss_cstr (" "));
+
+ ds_put_substring (&label, l.ss);
+ if (ivar_idx < iact->n_vars - 1)
+ ds_put_cstr (&label, "; ");
+
+ ds_destroy (&l);
}
boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
}
}
+static void
+show_spreadlevel (const struct examine *cmd, int iact_idx)
+{
+ const struct interaction *iact = cmd->iacts[iact_idx];
+ const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
+
+ int v;
+
+ /* Spreadlevel when there are no levels is not useful */
+ if (iact->n_vars == 0)
+ return;
+
+ for (v = 0; v < cmd->n_dep_vars; ++v)
+ {
+ int grp;
+ struct chart_item *sl;
+
+ struct string label;
+ ds_init_cstr (&label,
+ var_to_string (cmd->dep_vars[v]));
+
+ if (iact->n_vars > 0)
+ {
+ ds_put_cstr (&label, " (");
+ interaction_to_string (iact, &label);
+ ds_put_cstr (&label, ")");
+ }
+
+ sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
+
+ for (grp = 0; grp < n_cats; ++grp)
+ {
+ const struct exploratory_stats *es =
+ categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
+
+ double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
+
+ double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
+ percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
+
+ spreadlevel_plot_add (sl, iqr, median);
+ }
+
+ if (sl == NULL)
+ msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
+ else
+ chart_item_submit (sl);
+
+ ds_destroy (&label);
+ }
+}
+
static void
show_histogram (const struct examine *cmd, int iact_idx)
categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
struct string label;
+
+ if (es[v].histogram == NULL)
+ continue;
+
ds_init_cstr (&label,
var_to_string (cmd->dep_vars[v]));
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
const int nc = heading_columns + cmd->n_percentiles;
t = tab_create (nc, nr);
+
tab_title (t, _("Percentiles"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
heading_rows + v * rows_per_var + i * rows_per_cat,
0,
percentile_calculate (es->percentiles[p], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
if (cmd->ptiles[p] == 25.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[0],
- 0);
+ NULL, RC_OTHER);
}
else if (cmd->ptiles[p] == 50.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[1],
- 0);
+ NULL, RC_OTHER);
}
else if (cmd->ptiles[p] == 75.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[2],
- 0);
+ NULL, RC_OTHER);
}
}
const int nc = 2 + heading_columns;
t = tab_create (nc, nr);
+
tab_title (t, _("Descriptives"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat,
- 0, m1, 0);
+ 0, m1, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat,
- 0, calc_semean (m2, m0), 0);
+ 0, calc_semean (m2, m0), NULL, RC_OTHER);
tab_text_format (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
- 0, m1 - tval * calc_semean (m2, m0), 0);
+ 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER);
tab_text (t,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 2,
- 0, m1 + tval * calc_semean (m2, m0), 0);
+ 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER);
tab_text (t,
heading_rows + v * rows_per_var + i * rows_per_cat + 3,
0,
trimmed_mean_calculate (es->trimmed_mean),
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 4,
0,
percentile_calculate (es->quartiles[1], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
tab_text (t,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 5,
- 0, m2, 0);
+ 0, m2, NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 6,
- 0, sqrt (m2), 0);
+ 0, sqrt (m2), NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 7,
0,
es->minima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 8,
0,
es->maxima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 9,
0,
es->maxima[0].val - es->minima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
0,
percentile_calculate (es->quartiles[2], cmd->pc_alg) -
percentile_calculate (es->quartiles[0], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 11,
- 0, m3, 0);
+ 0, m3, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat + 11,
- 0, calc_seskew (m0), 0);
+ 0, calc_seskew (m0), NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 12,
- 0, m4, 0);
+ 0, m4, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat + 12,
- 0, calc_sekurt (m0), 0);
+ 0, calc_sekurt (m0), NULL, RC_OTHER);
}
free (prev_val);
const int nc = 2 + heading_columns;
t = tab_create (nc, nr);
+
tab_title (t, _("Extreme Values"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
heading_rows + v * rows_per_var + i * rows_per_cat + e,
TAB_RIGHT,
e + 1,
- &F_8_0);
+ NULL, RC_INTEGER);
/* The casenumber */
if (cmd->id_var)
heading_rows + v * rows_per_var + i * rows_per_cat + e,
TAB_RIGHT,
es->maxima[e].identity.f,
- &F_8_0);
+ NULL, RC_INTEGER);
tab_double (t,
- heading_columns + 1,
- heading_rows + v * rows_per_var + i * rows_per_cat + e,
- 0,
- es->maxima[e].val,
- 0);
-
-
+ heading_columns + 1,
+ heading_rows + v * rows_per_var + i * rows_per_cat + e,
+ 0,
+ es->maxima[e].val,
+ var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
+
tab_double (t,
heading_columns - 1,
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
TAB_RIGHT,
e + 1,
- &F_8_0);
+ NULL, RC_INTEGER);
/* The casenumber */
if (cmd->id_var)
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
TAB_RIGHT,
es->minima[e].identity.f,
- &F_8_0);
+ NULL, RC_INTEGER);
tab_double (t,
heading_columns + 1,
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
0,
es->minima[e].val,
- 0);
+ var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
}
}
free (prev_val);
const int nc = 6 + heading_columns;
t = tab_create (nc, nr);
+ tab_set_format (t, RC_WEIGHT, wfmt);
tab_title (t, _("Case Processing Summary"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx, heading_rows + n_cats * v + i,
heading_rows + n_cats * v + i,
0,
es[v].non_missing,
- wfmt);
+ NULL, RC_WEIGHT);
tab_text_format (t,
heading_rows + n_cats * v + i,
0,
es[v].missing,
- wfmt);
+ NULL, RC_WEIGHT);
tab_text_format (t,
heading_columns + 3,
heading_rows + n_cats * v + i,
0,
total,
- wfmt);
+ NULL, RC_WEIGHT);
/* This can only be 100% can't it? */
tab_text_format (t,
tab_submit (t);
}
-
-/* Match a variable.
- If the match succeeds, the variable will be placed in VAR.
- Returns true if successful */
-static bool
-lex_match_variable (struct lexer *lexer,
- const struct dictionary *dict, const struct variable **var)
-{
- if (lex_token (lexer) != T_ID)
-
- return false;
-
- *var = parse_variable_const (lexer, dict);
-
- if ( *var == NULL)
- return false;
- return true;
-}
-
/* Attempt to parse an interaction from LEXER */
static struct interaction *
parse_interaction (struct lexer *lexer, struct examine *ex)
int v;
const struct examine *examine = aux1;
struct exploratory_stats *es = user_data;
-
+
for (v = 0; v < examine->n_dep_vars; v++)
{
struct ccase *outcase ;
const struct variable *var = examine->dep_vars[v];
const double x = case_data (c, var)->f;
- if (var_is_value_missing (var, case_data (c, var), examine->exclude))
+ if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
{
es[v].missing += weight;
continue;
{
int i;
casenumber imin = 0;
- double imax = es[v].cc;
+ casenumber imax;
struct casereader *reader;
struct ccase *c;
- casenumber total_cases;
- if (examine->histogram)
+ if (examine->histogramplot)
{
/* Sturges Rule */
double bin_width = fabs (es[v].minimum - es[v].maximum)
/ (1 + log2 (es[v].cc))
;
- bin_width = chart_rounded_tick (bin_width);
-
es[v].histogram =
histogram_create (bin_width, es[v].minimum, es[v].maximum);
}
es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
- total_cases = casereader_count_cases (es[v].sorted_reader);
es[v].sorted_writer = NULL;
+ imax = casereader_get_case_cnt (es[v].sorted_reader);
+
es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
for (i = 0; i < examine->calc_extremes; ++i)
(c = casereader_read (reader)) != NULL; case_unref (c))
{
const double val = case_data_idx (c, EX_VAL)->f;
- const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */
+ const double wt = case_data_idx (c, EX_WT)->f;
moments_pass_two (es[v].mom, val, wt);
min->val = val;
value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
}
- imin += wt;
+ imin ++;
}
- imax -= wt;
+ imax --;
if (imax < examine->calc_extremes)
{
int x;
- for (x = imax; x < imax + wt; ++x)
+ for (x = imax; x < imax + 1; ++x)
{
struct extremity *max;
if (examine->calc_extremes > 0)
{
assert (es[v].minima[0].val == es[v].minimum);
- assert (es[v].maxima[0].val == es[v].maximum);
+ assert (es[v].maxima[0].val == es[v].maximum);
}
{
struct payload payload;
payload.create = create_n;
payload.update = update_n;
- payload.destroy = calculate_n;
+ payload.calculate = calculate_n;
+ payload.destroy = NULL;
cmd->wv = dict_get_weight (cmd->dict);
cmd->cats
= categoricals_create (cmd->iacts, cmd->n_iacts,
- cmd->wv, cmd->exclude);
+ cmd->wv, cmd->dep_excl, cmd->fctr_excl);
categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
- if (cmd->id_idx == -1)
+ if (cmd->id_var == NULL)
{
struct ccase *c = casereader_peek (input, 0);
- assert (cmd->id_var == NULL);
-
cmd->id_idx = case_get_value_cnt (c);
input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
case_unref (c);
}
- /* FIXME: Filter out missing factor variables */
-
/* Remove cases on a listwise basis if requested */
if ( cmd->missing_pw == false)
input = casereader_create_filter_missing (input,
cmd->dep_vars,
cmd->n_dep_vars,
- cmd->exclude,
+ cmd->dep_excl,
NULL,
NULL);
}
}
- if (cmd->histogram)
+ if (cmd->histogramplot)
show_histogram (cmd, i);
if (cmd->npplot)
show_npplot (cmd, i);
+ if (cmd->spreadlevelplot)
+ show_spreadlevel (cmd, i);
+
if (cmd->descriptives)
descriptives_report (cmd, i);
}
examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
examine.iacts[0] = interaction_create (NULL);
- examine.exclude = MV_ANY;
- examine.histogram = false;
+ examine.dep_excl = MV_ANY;
+ examine.fctr_excl = MV_ANY;
+ examine.histogramplot = false;
examine.npplot = false;
examine.boxplot = false;
+ examine.spreadlevelplot = false;
+ examine.sl_power = 0;
examine.dict = dataset_dict (ds);
}
else if (lex_match_id (lexer, "EXCLUDE"))
{
- examine.exclude = MV_ANY;
+ examine.dep_excl = MV_ANY;
}
else if (lex_match_id (lexer, "INCLUDE"))
{
- examine.exclude = MV_SYSTEM;
+ examine.dep_excl = MV_SYSTEM;
+ }
+ else if (lex_match_id (lexer, "REPORT"))
+ {
+ examine.fctr_excl = MV_NEVER;
+ }
+ else if (lex_match_id (lexer, "NOREPORT"))
+ {
+ examine.fctr_excl = MV_ANY;
}
else
{
}
else if (lex_match_id (lexer, "HISTOGRAM"))
{
- examine.histogram = true;
+ examine.histogramplot = true;
+ }
+ else if (lex_match_id (lexer, "SPREADLEVEL"))
+ {
+ examine.spreadlevelplot = true;
+ examine.sl_power = 0;
+ if (lex_match (lexer, T_LPAREN))
+ {
+ examine.sl_power = lex_integer (lexer);
+
+ lex_get (lexer);
+ if (! lex_force_match (lexer, T_RPAREN))
+ goto error;
+ }
}
else if (lex_match_id (lexer, "NONE"))
{
- examine.histogram = false;
+ examine.histogramplot = false;
examine.npplot = false;
examine.boxplot = false;
}
else if (lex_match (lexer, T_ALL))
{
- examine.histogram = true;
+ examine.histogramplot = true;
examine.npplot = true;
examine.boxplot = true;
}
{
examine.n_iacts--;
examine.iacts = &iacts_mem[1];
+ interaction_destroy (iacts_mem[0]);
}
for (i = 0; i < examine.n_iacts; ++i)
interaction_destroy (examine.iacts[i]);
-
free (examine.ptiles);
free (examine.dep_vars);
pool_destroy (examine.pool);
error:
caseproto_unref (examine.ex_proto);
+ examine.iacts = iacts_mem;
for (i = 0; i < examine.n_iacts; ++i)
interaction_destroy (examine.iacts[i]);
free (examine.dep_vars);