/*
PSPP - a program for statistical analysis.
- Copyright (C) 2012 Free Software Foundation, Inc.
+ Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
+static void
+append_value_name (const struct variable *var, const union value *val, struct string *str)
+{
+ var_append_value_name (var, val, str);
+ if ( var_is_value_missing (var, val, MV_ANY))
+ ds_put_cstr (str, _(" (missing)"));
+}
+
enum bp_mode
{
BP_GROUPS,
size_t n_iacts;
struct interaction **iacts;
- enum mv_class exclude;
+ enum mv_class dep_excl;
+ enum mv_class fctr_excl;
const struct dictionary *dict;
size_t n_percentiles;
bool npplot;
- bool histogram;
+ bool histogramplot;
bool boxplot;
- bool spreadlevel;
+ bool spreadlevelplot;
int sl_power;
enum bp_mode boxplot_mode;
const struct ccase *c =
categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
- const struct exploratory_stats *es =
+ struct exploratory_stats *es =
categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
ds_init_empty (&label);
for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
{
+ struct string l;
const struct variable *ivar = iact->vars[ivar_idx];
const union value *val = case_data (c, ivar);
-
- ds_put_cstr (&label, var_to_string (ivar));
- ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
- ds_put_cstr (&label, "; ");
+ ds_init_empty (&l);
+
+ append_value_name (ivar, val, &l);
+ ds_ltrim (&l, ss_cstr (" "));
+
+ ds_put_substring (&label, l.ss);
+ if (ivar_idx < iact->n_vars - 1)
+ ds_put_cstr (&label, "; ");
+
+ ds_destroy (&l);
}
boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
+ es[v].box_whisker = NULL;
ds_destroy (&label);
}
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
for (v = 0; v < cmd->n_dep_vars; ++v)
{
- const struct exploratory_stats *es =
+ struct exploratory_stats *es =
categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
boxplot_add_box (boxplot, es[v].box_whisker,
var_to_string (cmd->dep_vars[v]));
+ es[v].box_whisker = NULL;
}
boxplot_submit (boxplot);
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
ds_put_cstr (&label, var_to_string (ivar));
ds_put_cstr (&label, " = ");
- var_append_value_name (ivar, val, &label);
+ append_value_name (ivar, val, &label);
ds_put_cstr (&label, "; ");
}
const int nc = heading_columns + cmd->n_percentiles;
t = tab_create (nc, nr);
+
tab_title (t, _("Percentiles"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
heading_rows + v * rows_per_var + i * rows_per_cat,
0,
percentile_calculate (es->percentiles[p], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
if (cmd->ptiles[p] == 25.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[0],
- 0);
+ NULL, RC_OTHER);
}
else if (cmd->ptiles[p] == 50.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[1],
- 0);
+ NULL, RC_OTHER);
}
else if (cmd->ptiles[p] == 75.0)
{
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
0,
hinges[2],
- 0);
+ NULL, RC_OTHER);
}
}
const int nc = 2 + heading_columns;
t = tab_create (nc, nr);
+
tab_title (t, _("Descriptives"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat,
- 0, m1, 0);
+ 0, m1, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat,
- 0, calc_semean (m2, m0), 0);
+ 0, calc_semean (m2, m0), NULL, RC_OTHER);
tab_text_format (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 1,
- 0, m1 - tval * calc_semean (m2, m0), 0);
+ 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER);
tab_text (t,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 2,
- 0, m1 + tval * calc_semean (m2, m0), 0);
+ 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER);
tab_text (t,
heading_rows + v * rows_per_var + i * rows_per_cat + 3,
0,
trimmed_mean_calculate (es->trimmed_mean),
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 4,
0,
percentile_calculate (es->quartiles[1], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
tab_text (t,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 5,
- 0, m2, 0);
+ 0, m2, NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 6,
- 0, sqrt (m2), 0);
+ 0, sqrt (m2), NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 7,
0,
es->minima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 8,
0,
es->maxima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
heading_rows + v * rows_per_var + i * rows_per_cat + 9,
0,
es->maxima[0].val - es->minima[0].val,
- 0);
+ NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
0,
percentile_calculate (es->quartiles[2], cmd->pc_alg) -
percentile_calculate (es->quartiles[0], cmd->pc_alg),
- 0);
+ NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 11,
- 0, m3, 0);
+ 0, m3, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat + 11,
- 0, calc_seskew (m0), 0);
+ 0, calc_seskew (m0), NULL, RC_OTHER);
tab_text (t,
1 + iact->n_vars,
tab_double (t,
1 + iact->n_vars + 2,
heading_rows + v * rows_per_var + i * rows_per_cat + 12,
- 0, m4, 0);
+ 0, m4, NULL, RC_OTHER);
tab_double (t,
1 + iact->n_vars + 3,
heading_rows + v * rows_per_var + i * rows_per_cat + 12,
- 0, calc_sekurt (m0), 0);
+ 0, calc_sekurt (m0), NULL, RC_OTHER);
}
free (prev_val);
const int nc = 2 + heading_columns;
t = tab_create (nc, nr);
+
tab_title (t, _("Extreme Values"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx,
heading_rows + v * rows_per_var + i * rows_per_cat + e,
TAB_RIGHT,
e + 1,
- &F_8_0);
+ NULL, RC_INTEGER);
/* The casenumber */
if (cmd->id_var)
heading_rows + v * rows_per_var + i * rows_per_cat + e,
TAB_RIGHT,
es->maxima[e].identity.f,
- &F_8_0);
+ NULL, RC_INTEGER);
tab_double (t,
heading_columns + 1,
heading_rows + v * rows_per_var + i * rows_per_cat + e,
0,
es->maxima[e].val,
- var_get_print_format (cmd->dep_vars[v]));
+ var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
tab_double (t,
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
TAB_RIGHT,
e + 1,
- &F_8_0);
+ NULL, RC_INTEGER);
/* The casenumber */
if (cmd->id_var)
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
TAB_RIGHT,
es->minima[e].identity.f,
- &F_8_0);
+ NULL, RC_INTEGER);
tab_double (t,
heading_columns + 1,
heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
0,
es->minima[e].val,
- var_get_print_format (cmd->dep_vars[v]));
+ var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
}
}
free (prev_val);
const int nc = 6 + heading_columns;
t = tab_create (nc, nr);
+ tab_set_format (t, RC_WEIGHT, wfmt);
tab_title (t, _("Case Processing Summary"));
tab_headers (t, heading_columns, 0, heading_rows, 0);
{
struct string str;
ds_init_empty (&str);
- var_append_value_name (ivar, val, &str);
+ append_value_name (ivar, val, &str);
tab_text (t,
1 + ivar_idx, heading_rows + n_cats * v + i,
heading_rows + n_cats * v + i,
0,
es[v].non_missing,
- wfmt);
+ NULL, RC_WEIGHT);
tab_text_format (t,
heading_rows + n_cats * v + i,
0,
es[v].missing,
- wfmt);
+ NULL, RC_WEIGHT);
tab_text_format (t,
heading_columns + 3,
heading_rows + n_cats * v + i,
0,
total,
- wfmt);
+ NULL, RC_WEIGHT);
/* This can only be 100% can't it? */
tab_text_format (t,
tab_submit (t);
}
-
-/* Match a variable.
- If the match succeeds, the variable will be placed in VAR.
- Returns true if successful */
-static bool
-lex_match_variable (struct lexer *lexer,
- const struct dictionary *dict, const struct variable **var)
-{
- if (lex_token (lexer) != T_ID)
-
- return false;
-
- *var = parse_variable_const (lexer, dict);
-
- if ( *var == NULL)
- return false;
- return true;
-}
-
/* Attempt to parse an interaction from LEXER */
static struct interaction *
parse_interaction (struct lexer *lexer, struct examine *ex)
const struct examine *examine = aux1;
struct exploratory_stats *es = user_data;
+ bool this_case_is_missing = false;
+ /* LISTWISE missing must be dealt with here */
+ if (!examine->missing_pw)
+ {
+ for (v = 0; v < examine->n_dep_vars; v++)
+ {
+ const struct variable *var = examine->dep_vars[v];
+
+ if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
+ {
+ es[v].missing += weight;
+ this_case_is_missing = true;
+ }
+ }
+ }
+
+ if (this_case_is_missing)
+ return;
+
for (v = 0; v < examine->n_dep_vars; v++)
{
struct ccase *outcase ;
const struct variable *var = examine->dep_vars[v];
const double x = case_data (c, var)->f;
- if (var_is_value_missing (var, case_data (c, var), examine->exclude))
+ if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
{
es[v].missing += weight;
continue;
{
int i;
casenumber imin = 0;
- double imax = es[v].cc;
+ casenumber imax;
struct casereader *reader;
struct ccase *c;
- casenumber total_cases;
- if (examine->histogram)
+ if (examine->histogramplot && es[v].non_missing > 0)
{
/* Sturges Rule */
double bin_width = fabs (es[v].minimum - es[v].maximum)
/ (1 + log2 (es[v].cc))
;
- bin_width = chart_rounded_tick (bin_width);
-
es[v].histogram =
histogram_create (bin_width, es[v].minimum, es[v].maximum);
}
es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
- total_cases = casereader_count_cases (es[v].sorted_reader);
es[v].sorted_writer = NULL;
+ imax = casereader_get_case_cnt (es[v].sorted_reader);
+
es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
for (i = 0; i < examine->calc_extremes; ++i)
value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
}
-
+
+ bool warn = true;
for (reader = casereader_clone (es[v].sorted_reader);
(c = casereader_read (reader)) != NULL; case_unref (c))
{
const double val = case_data_idx (c, EX_VAL)->f;
- const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */
-
+ double wt = case_data_idx (c, EX_WT)->f;
+ wt = var_force_valid_weight (examine->wv, wt, &warn);
+
moments_pass_two (es[v].mom, val, wt);
if (es[v].histogram)
min->val = val;
value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
}
- imin += wt;
+ imin ++;
}
- imax -= wt;
+ imax --;
if (imax < examine->calc_extremes)
{
int x;
- for (x = imax; x < imax + wt; ++x)
+ for (x = imax; x < imax + 1; ++x)
{
struct extremity *max;
}
casereader_destroy (reader);
- if (examine->calc_extremes > 0)
+ if (examine->calc_extremes > 0 && es[v].non_missing > 0)
{
assert (es[v].minima[0].val == es[v].minimum);
- assert (es[v].maxima[0].val == es[v].maximum);
+ assert (es[v].maxima[0].val == es[v].maximum);
}
{
statistic_destroy (&es[v].histogram->parent);
moments_destroy (es[v].mom);
+ if (es[v].box_whisker)
+ {
+ stat = &es[v].box_whisker->parent.parent;
+ stat->destroy (stat);
+ }
+
casereader_destroy (es[v].sorted_reader);
}
}
struct payload payload;
payload.create = create_n;
payload.update = update_n;
- payload.destroy = calculate_n;
+ payload.calculate = calculate_n;
+ payload.destroy = NULL;
cmd->wv = dict_get_weight (cmd->dict);
cmd->cats
= categoricals_create (cmd->iacts, cmd->n_iacts,
- cmd->wv, cmd->exclude);
+ cmd->wv, cmd->dep_excl, cmd->fctr_excl);
categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
- if (cmd->id_idx == -1)
+ if (cmd->id_var == NULL)
{
struct ccase *c = casereader_peek (input, 0);
- assert (cmd->id_var == NULL);
-
cmd->id_idx = case_get_value_cnt (c);
input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
case_unref (c);
}
- /* FIXME: Filter out missing factor variables */
-
- /* Remove cases on a listwise basis if requested */
- if ( cmd->missing_pw == false)
- input = casereader_create_filter_missing (input,
- cmd->dep_vars,
- cmd->n_dep_vars,
- cmd->exclude,
- NULL,
- NULL);
-
for (reader = input;
(c = casereader_read (reader)) != NULL; case_unref (c))
{
{
summary_report (cmd, i);
+ const size_t n_cats = categoricals_n_count (cmd->cats, i);
+ if (n_cats == 0)
+ continue;
+
if (cmd->disp_extremes > 0)
extremes_report (cmd, i);
}
}
- if (cmd->histogram)
+ if (cmd->histogramplot)
show_histogram (cmd, i);
if (cmd->npplot)
show_npplot (cmd, i);
- if (cmd->spreadlevel)
+ if (cmd->spreadlevelplot)
show_spreadlevel (cmd, i);
if (cmd->descriptives)
examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
examine.iacts[0] = interaction_create (NULL);
- examine.exclude = MV_ANY;
- examine.histogram = false;
+ examine.dep_excl = MV_ANY;
+ examine.fctr_excl = MV_ANY;
+ examine.histogramplot = false;
examine.npplot = false;
examine.boxplot = false;
- examine.spreadlevel = false;
+ examine.spreadlevelplot = false;
examine.sl_power = 0;
-
+ examine.dep_vars = NULL;
+ examine.n_dep_vars = 0;
examine.dict = dataset_dict (ds);
/* Accept an optional, completely pointless "/VARIABLES=" */
int extr = 5;
if (lex_match (lexer, T_LPAREN))
{
+ if (!lex_force_int (lexer))
+ goto error;
extr = lex_integer (lexer);
if (extr < 0)
}
else if (lex_match_id (lexer, "EXCLUDE"))
{
- examine.exclude = MV_ANY;
+ examine.dep_excl = MV_ANY;
}
else if (lex_match_id (lexer, "INCLUDE"))
{
- examine.exclude = MV_SYSTEM;
+ examine.dep_excl = MV_SYSTEM;
+ }
+ else if (lex_match_id (lexer, "REPORT"))
+ {
+ examine.fctr_excl = MV_NEVER;
+ }
+ else if (lex_match_id (lexer, "NOREPORT"))
+ {
+ examine.fctr_excl = MV_ANY;
}
else
{
}
else if (lex_match_id (lexer, "HISTOGRAM"))
{
- examine.histogram = true;
+ examine.histogramplot = true;
}
else if (lex_match_id (lexer, "SPREADLEVEL"))
{
- examine.spreadlevel = true;
+ examine.spreadlevelplot = true;
examine.sl_power = 0;
- if (lex_match (lexer, T_LPAREN))
+ if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer))
{
examine.sl_power = lex_integer (lexer);
}
else if (lex_match_id (lexer, "NONE"))
{
- examine.histogram = false;
+ examine.histogramplot = false;
examine.npplot = false;
examine.boxplot = false;
}
else if (lex_match (lexer, T_ALL))
{
- examine.histogram = true;
+ examine.histogramplot = true;
examine.npplot = true;
examine.boxplot = true;
}
{
examine.n_iacts--;
examine.iacts = &iacts_mem[1];
+ interaction_destroy (iacts_mem[0]);
}
for (i = 0; i < examine.n_iacts; ++i)
interaction_destroy (examine.iacts[i]);
-
free (examine.ptiles);
free (examine.dep_vars);
pool_destroy (examine.pool);