/*
PSPP - a program for statistical analysis.
- Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc.
+ Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "math/sort.h"
#include "math/order-stats.h"
#include "math/percentiles.h"
+#include "math/shapiro-wilk.h"
#include "math/tukey-hinges.h"
#include "math/trimmed-mean.h"
append_value_name (const struct variable *var, const union value *val, struct string *str)
{
var_append_value_name (var, val, str);
- if ( var_is_value_missing (var, val, MV_ANY))
+ if (var_is_value_missing (var, val))
ds_put_cstr (str, _(" (missing)"));
}
};
+#define PLOT_HISTOGRAM 0x1
+#define PLOT_BOXPLOT 0x2
+#define PLOT_NPPLOT 0x4
+#define PLOT_SPREADLEVEL 0x8
+
struct examine
{
struct pool *pool;
double *ptiles;
size_t n_percentiles;
- bool npplot;
- bool histogramplot;
- bool boxplot;
- bool spreadlevelplot;
- int sl_power;
+ unsigned int plot;
+ float sl_power;
enum bp_mode boxplot_mode;
struct trimmed_mean *trimmed_mean;
struct percentile *quartiles[3];
struct percentile **percentiles;
+ struct shapiro_wilk *shapiro_wilk;
struct tukey_hinges *hinges;
const struct exploratory_stats *es =
categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
- if ( y_min > es[v].minimum)
+ if (y_min > es[v].minimum)
y_min = es[v].minimum;
- if ( y_max < es[v].maximum)
+ if (y_max < es[v].maximum)
y_max = es[v].maximum;
}
const struct exploratory_stats *es =
categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
- if ( y_min > es[v].minimum)
+ if (y_min > es[v].minimum)
y_min = es[v].minimum;
- if ( y_max < es[v].maximum)
+ if (y_max < es[v].maximum)
y_max = es[v].maximum;
}
- if ( iact->n_vars == 0)
+ if (iact->n_vars == 0)
ds_put_format (&title, _("Boxplot"));
else
{
int grp;
for (grp = 0; grp < n_cats; ++grp)
{
- struct chart_item *npp, *dnpp;
+ struct chart *npp, *dnpp;
struct casereader *reader;
struct np *np;
ds_init_cstr (&label,
var_to_string (cmd->dep_vars[v]));
- if ( iact->n_vars > 0)
+ if (iact->n_vars > 0)
{
ds_put_cstr (&label, " (");
for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
if (npp == NULL || dnpp == NULL)
{
msg (MW, _("Not creating NP plot because data set is empty."));
- chart_item_unref (npp);
- chart_item_unref (dnpp);
+ chart_unref (npp);
+ chart_unref (dnpp);
}
else
{
- chart_item_submit (npp);
- chart_item_submit (dnpp);
+ chart_submit (npp);
+ chart_submit (dnpp);
}
casereader_destroy (reader);
for (v = 0; v < cmd->n_dep_vars; ++v)
{
int grp;
- struct chart_item *sl;
+ struct chart *sl;
struct string label;
ds_init_cstr (&label,
if (sl == NULL)
msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
else
- chart_item_submit (sl);
+ chart_submit (sl);
ds_destroy (&label);
}
ds_init_cstr (&label,
var_to_string (cmd->dep_vars[v]));
- if ( iact->n_vars > 0)
+ if (iact->n_vars > 0)
{
ds_put_cstr (&label, " (");
for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
- chart_item_submit
- ( histogram_chart_create (es[v].histogram->gsl_hist,
+ chart_submit
+ (histogram_chart_create (es[v].histogram->gsl_hist,
ds_cstr (&label), n, mean,
sqrt (var), false));
struct pivot_footnote *missing_footnote)
{
struct pivot_value *pv = pivot_value_new_var_value (var, value);
- if (var_is_value_missing (var, value, MV_USER))
+ if (var_is_value_missing (var, value) == MV_USER)
pivot_value_add_footnote (pv, missing_footnote);
return pv;
}
const struct interaction *iact,
struct pivot_footnote *missing_footnote)
{
- for (size_t i = iact->n_vars; i-- > 0; )
+ for (size_t i = iact->n_vars; i-- > 0;)
{
const struct variable *var = iact->vars[i];
struct pivot_dimension *d = pivot_dimension_create__ (
percentiles_report (const struct examine *cmd, int iact_idx)
{
struct pivot_table *table = pivot_table_create (N_("Percentiles"));
- table->omit_empty = true;
struct pivot_dimension *percentiles = pivot_dimension_create (
table, PIVOT_AXIS_COLUMN, N_("Percentiles"));
pivot_table_submit (table);
}
+static void
+normality_report (const struct examine *cmd, int iact_idx)
+{
+ struct pivot_table *table = pivot_table_create (N_("Tests of Normality"));
+
+ struct pivot_dimension *test =
+ pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"),
+ N_("Statistic"),
+ N_("df"), PIVOT_RC_COUNT,
+ N_("Sig."));
+
+ test->root->show_label = true;
+
+ const struct interaction *iact = cmd->iacts[iact_idx];
+ struct pivot_footnote *missing_footnote = create_missing_footnote (table);
+ create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
+
+ struct pivot_dimension *dep_dim = pivot_dimension_create (
+ table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
+
+ size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
+
+ size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
+ for (size_t v = 0; v < cmd->n_dep_vars; ++v)
+ {
+ indexes[table->n_dimensions - 1] =
+ pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
+
+ for (size_t i = 0; i < n_cats; ++i)
+ {
+ indexes[1] = i;
+
+ const struct exploratory_stats *es
+ = categoricals_get_user_data_by_category_real (
+ cmd->cats, iact_idx, i);
+
+ struct shapiro_wilk *sw = es[v].shapiro_wilk;
+
+ if (sw == NULL)
+ continue;
+
+ double w = shapiro_wilk_calculate (sw);
+
+ int j = 0;
+ indexes[0] = j;
+
+ pivot_table_put (table, indexes, table->n_dimensions,
+ pivot_value_new_number (w));
+
+ indexes[0] = ++j;
+ pivot_table_put (table, indexes, table->n_dimensions,
+ pivot_value_new_number (sw->n));
+
+ indexes[0] = ++j;
+ pivot_table_put (table, indexes, table->n_dimensions,
+ pivot_value_new_number (shapiro_wilk_significance (sw->n, w)));
+ }
+ }
+
+ free (indexes);
+
+ pivot_table_submit (table);
+}
+
+
static void
descriptives_report (const struct examine *cmd, int iact_idx)
{
struct pivot_table *table = pivot_table_create (N_("Descriptives"));
- table->omit_empty = true;
pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Aspect"),
N_("Statistic"), N_("Std. Error"));
extremes_report (const struct examine *cmd, int iact_idx)
{
struct pivot_table *table = pivot_table_create (N_("Extreme Values"));
- table->omit_empty = true;
struct pivot_dimension *statistics = pivot_dimension_create (
table, PIVOT_AXIS_COLUMN, N_("Statistics"));
for (size_t i = 0; i < cmd->disp_extremes; i++)
pivot_category_create_leaf (order->root, pivot_value_new_integer (i + 1));
- pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Extreme"),
+ pivot_dimension_create (table, PIVOT_AXIS_ROW,
+ /* TRANSLATORS: This is a noun, not an adjective. */
+ N_("Extreme"),
N_("Highest"), N_("Lowest"));
const struct interaction *iact = cmd->iacts[iact_idx];
{
struct pivot_table *table = pivot_table_create (
N_("Case Processing Summary"));
- table->omit_empty = true;
pivot_table_set_weight_var (table, dict_get_weight (cmd->dict));
pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
const struct variable *v = NULL;
struct interaction *iact = NULL;
- if ( lex_match_variable (lexer, ex->dict, &v))
+ if (lex_match_variable (lexer, ex->dict, &v))
{
iact = interaction_create (v);
es[v].minimum = DBL_MAX;
}
- subcase_destroy (&ordering);
+ subcase_uninit (&ordering);
return es;
}
{
const struct variable *var = examine->dep_vars[v];
- if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
+ if (var_is_value_missing (var, case_data (c, var))
+ & examine->dep_excl)
{
es[v].missing += weight;
this_case_is_missing = true;
{
struct ccase *outcase ;
const struct variable *var = examine->dep_vars[v];
- const double x = case_data (c, var)->f;
+ const double x = case_num (c, var);
- if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
+ if (var_is_value_missing (var, case_data (c, var)) & examine->dep_excl)
{
es[v].missing += weight;
continue;
/* Save the value and the ID to the writer */
assert (examine->id_idx != -1);
- case_data_rw_idx (outcase, EX_VAL)->f = x;
+ *case_num_rw_idx (outcase, EX_VAL) = x;
value_copy (case_data_rw_idx (outcase, EX_ID),
case_data_idx (c, examine->id_idx), examine->id_width);
- case_data_rw_idx (outcase, EX_WT)->f = weight;
+ *case_num_rw_idx (outcase, EX_WT) = weight;
es[v].cc += weight;
struct casereader *reader;
struct ccase *c;
- if (examine->histogramplot && es[v].non_missing > 0)
+ if (examine->plot & PLOT_HISTOGRAM && es[v].non_missing > 0)
{
/* Sturges Rule */
double bin_width = fabs (es[v].minimum - es[v].maximum)
es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
es[v].sorted_writer = NULL;
- imax = casereader_get_case_cnt (es[v].sorted_reader);
+ imax = casereader_get_n_cases (es[v].sorted_reader);
es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
for (reader = casereader_clone (es[v].sorted_reader);
(c = casereader_read (reader)) != NULL; case_unref (c))
{
- const double val = case_data_idx (c, EX_VAL)->f;
- double wt = case_data_idx (c, EX_WT)->f;
+ const double val = case_num_idx (c, EX_VAL);
+ double wt = case_num_idx (c, EX_WT);
wt = var_force_valid_weight (examine->wv, wt, &warn);
moments_pass_two (es[v].mom, val, wt);
{
const int n_os = 5 + examine->n_percentiles;
- struct order_stats **os ;
es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
+ es[v].shapiro_wilk = NULL;
- os = xcalloc (n_os, sizeof *os);
+ struct order_stats **os = XCALLOC (n_os, struct order_stats *);
os[0] = &es[v].trimmed_mean->parent;
es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
free (os);
}
- if (examine->boxplot)
+ if (examine->plot & PLOT_BOXPLOT)
{
struct order_stats *os;
EX_WT, EX_VAL);
}
- if (examine->npplot)
+ if (examine->plot)
+ {
+ double mean;
+
+ moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL);
+
+ es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean);
+
+ if (es[v].shapiro_wilk)
+ {
+ struct order_stats *os = &es[v].shapiro_wilk->parent;
+ order_stats_accumulate_idx (&os, 1,
+ casereader_clone (es[v].sorted_reader),
+ EX_WT, EX_VAL);
+ }
+ }
+
+ if (examine->plot & PLOT_NPPLOT)
{
double n, mean, var;
struct order_stats *os;
stat->destroy (stat);
}
+ if (es[v].shapiro_wilk)
+ {
+ stat = &es[v].shapiro_wilk->parent.parent;
+ stat->destroy (stat);
+ }
+
os = &es[v].trimmed_mean->parent;
stat = &os->parent;
stat->destroy (stat);
{
struct ccase *c = casereader_peek (input, 0);
- cmd->id_idx = case_get_value_cnt (c);
+ cmd->id_idx = case_get_n_values (c);
input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
case_unref (c);
if (cmd->n_percentiles > 0)
percentiles_report (cmd, i);
- if (cmd->boxplot)
+ if (cmd->plot & PLOT_BOXPLOT)
{
switch (cmd->boxplot_mode)
{
}
}
- if (cmd->histogramplot)
+ if (cmd->plot & PLOT_HISTOGRAM)
show_histogram (cmd, i);
- if (cmd->npplot)
+ if (cmd->plot & PLOT_NPPLOT)
show_npplot (cmd, i);
- if (cmd->spreadlevelplot)
+ if (cmd->plot & PLOT_SPREADLEVEL)
show_spreadlevel (cmd, i);
if (cmd->descriptives)
descriptives_report (cmd, i);
+
+ if (cmd->plot)
+ normality_report (cmd, i);
}
cleanup_exploratory_stats (cmd);
examine.dep_excl = MV_ANY;
examine.fctr_excl = MV_ANY;
- examine.histogramplot = false;
- examine.npplot = false;
- examine.boxplot = false;
- examine.spreadlevelplot = false;
+ examine.plot = 0;
examine.sl_power = 0;
examine.dep_vars = NULL;
examine.n_dep_vars = 0;
lex_match (lexer, T_SLASH);
if (lex_match_id (lexer, "VARIABLES"))
{
- if (! lex_force_match (lexer, T_EQUALS) )
+ if (! lex_force_match (lexer, T_EQUALS))
goto error;
}
int extr = 5;
if (lex_match (lexer, T_LPAREN))
{
- if (!lex_force_int (lexer))
+ if (!lex_force_int_range (lexer, "EXTREME", 0, INT_MAX))
goto error;
extr = lex_integer (lexer);
- if (extr < 0)
- {
- msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0);
- extr = 5;
- }
-
lex_get (lexer);
if (! lex_force_match (lexer, T_RPAREN))
goto error;
{
while (lex_is_number (lexer))
{
+ if (!lex_force_num_range_open (lexer, "PERCENTILES", 0, 100))
+ goto error;
double p = lex_number (lexer);
- if ( p <= 0 || p >= 100.0)
- {
- lex_error (lexer,
- _("Percentiles must lie in the range (0, 100)"));
- goto error;
- }
-
examine.n_percentiles++;
examine.ptiles =
xrealloc (examine.ptiles,
}
else if (lex_match_id (lexer, "REPORT"))
{
- examine.fctr_excl = MV_NEVER;
+ examine.fctr_excl = 0;
}
else if (lex_match_id (lexer, "NOREPORT"))
{
{
if (lex_match_id (lexer, "BOXPLOT"))
{
- examine.boxplot = true;
+ examine.plot |= PLOT_BOXPLOT;
}
else if (lex_match_id (lexer, "NPPLOT"))
{
- examine.npplot = true;
+ examine.plot |= PLOT_NPPLOT;
}
else if (lex_match_id (lexer, "HISTOGRAM"))
{
- examine.histogramplot = true;
+ examine.plot |= PLOT_HISTOGRAM;
}
else if (lex_match_id (lexer, "SPREADLEVEL"))
{
- examine.spreadlevelplot = true;
+ examine.plot |= PLOT_SPREADLEVEL;
examine.sl_power = 0;
- if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer))
+ if (lex_match (lexer, T_LPAREN) && lex_force_num (lexer))
{
- examine.sl_power = lex_integer (lexer);
+ examine.sl_power = lex_number (lexer);
lex_get (lexer);
if (! lex_force_match (lexer, T_RPAREN))
}
else if (lex_match_id (lexer, "NONE"))
{
- examine.histogramplot = false;
- examine.npplot = false;
- examine.boxplot = false;
+ examine.plot = 0;
}
else if (lex_match (lexer, T_ALL))
{
- examine.histogramplot = true;
- examine.npplot = true;
- examine.boxplot = true;
+ examine.plot = ~0;
}
else
{
}
else if (lex_match_id (lexer, "CINTERVAL"))
{
- if ( !lex_force_num (lexer))
+ if (!lex_force_num (lexer))
goto error;
examine.conf = lex_number (lexer);
}
- if ( totals_seen && nototals_seen)
+ if (totals_seen && nototals_seen)
{
- msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
+ msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
goto error;
}
/* If totals have been requested or if there are no factors
in this analysis, then the totals need to be included. */
- if ( !nototals_seen || examine.n_iacts == 1)
+ if (!nototals_seen || examine.n_iacts == 1)
{
examine.iacts = &iacts_mem[0];
}
}
- if ( examine.id_var )
+ if (examine.id_var)
{
examine.id_idx = var_get_case_index (examine.id_var);
examine.id_width = var_get_width (examine.id_var);
if (percentiles_seen && examine.n_percentiles == 0)
{
examine.n_percentiles = 7;
- examine.ptiles = xcalloc (examine.n_percentiles,
- sizeof (*examine.ptiles));
+ examine.ptiles = xcalloc (examine.n_percentiles, sizeof (*examine.ptiles));
examine.ptiles[0] = 5;
examine.ptiles[1] = 10;