X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fexamine.c;h=dabcdfe9b35e6135f929444535a889f069ad7ac6;hb=09e3c99327ce339effa18218d656659a99e3be92;hp=4f47a6c95a38e95fdaa01785f95f2f51a07aab7d;hpb=5cab4cf3322f29c0ed7134d23740e07382914f20;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 4f47a6c95a..dabcdfe9b3 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc. + Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ #include "math/sort.h" #include "math/order-stats.h" #include "math/percentiles.h" +#include "math/shapiro-wilk.h" #include "math/tukey-hinges.h" #include "math/trimmed-mean.h" @@ -90,6 +91,11 @@ enum }; +#define PLOT_HISTOGRAM 0x1 +#define PLOT_BOXPLOT 0x2 +#define PLOT_NPPLOT 0x4 +#define PLOT_SPREADLEVEL 0x8 + struct examine { struct pool *pool; @@ -128,10 +134,7 @@ struct examine double *ptiles; size_t n_percentiles; - bool npplot; - bool histogramplot; - bool boxplot; - bool spreadlevelplot; + unsigned int plot; int sl_power; enum bp_mode boxplot_mode; @@ -178,6 +181,7 @@ struct exploratory_stats struct trimmed_mean *trimmed_mean; struct percentile *quartiles[3]; struct percentile **percentiles; + struct shapiro_wilk *shapiro_wilk; struct tukey_hinges *hinges; @@ -659,6 +663,72 @@ percentiles_report (const struct examine *cmd, int iact_idx) pivot_table_submit (table); } +static void +normality_report (const struct examine *cmd, int iact_idx) +{ + struct pivot_table *table = pivot_table_create (N_("Tests of Normality")); + table->omit_empty = true; + + struct pivot_dimension *test = + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"), + N_("Statistic"), + N_("df"), PIVOT_RC_COUNT, + N_("Sig.")); + + test->root->show_label = true; + + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); + + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); + + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); + + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) + { + indexes[table->n_dimensions - 1] = + pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); + + for (size_t i = 0; i < n_cats; ++i) + { + indexes[1] = i; + + const struct exploratory_stats *es + = categoricals_get_user_data_by_category_real ( + cmd->cats, iact_idx, i); + + struct shapiro_wilk *sw = es[v].shapiro_wilk; + + if (sw == NULL) + continue; + + double w = shapiro_wilk_calculate (sw); + + int j = 0; + indexes[0] = j; + + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (w)); + + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (sw->n)); + + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (shapiro_wilk_significance (sw->n, w))); + } + } + + free (indexes); + + pivot_table_submit (table); +} + + static void descriptives_report (const struct examine *cmd, int iact_idx) { @@ -1053,7 +1123,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct casereader *reader; struct ccase *c; - if (examine->histogramplot && es[v].non_missing > 0) + if (examine->plot & PLOT_HISTOGRAM && es[v].non_missing > 0) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) @@ -1134,6 +1204,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles)); es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05); + es[v].shapiro_wilk = NULL; os = xcalloc (n_os, sizeof *os); os[0] = &es[v].trimmed_mean->parent; @@ -1162,7 +1233,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) free (os); } - if (examine->boxplot) + if (examine->plot & PLOT_BOXPLOT) { struct order_stats *os; @@ -1175,7 +1246,24 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) EX_WT, EX_VAL); } - if (examine->npplot) + if (examine->plot) + { + double mean; + + moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL); + + es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean); + + if (es[v].shapiro_wilk) + { + struct order_stats *os = &es[v].shapiro_wilk->parent; + order_stats_accumulate_idx (&os, 1, + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); + } + } + + if (examine->plot & PLOT_NPPLOT) { double n, mean, var; struct order_stats *os; @@ -1309,7 +1397,7 @@ run_examine (struct examine *cmd, struct casereader *input) if (cmd->n_percentiles > 0) percentiles_report (cmd, i); - if (cmd->boxplot) + if (cmd->plot & PLOT_BOXPLOT) { switch (cmd->boxplot_mode) { @@ -1325,17 +1413,20 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogramplot) + if (cmd->plot & PLOT_HISTOGRAM) show_histogram (cmd, i); - if (cmd->npplot) + if (cmd->plot & PLOT_NPPLOT) show_npplot (cmd, i); - if (cmd->spreadlevelplot) + if (cmd->plot & PLOT_SPREADLEVEL) show_spreadlevel (cmd, i); if (cmd->descriptives) descriptives_report (cmd, i); + + if (cmd->plot) + normality_report (cmd, i); } cleanup_exploratory_stats (cmd); @@ -1382,10 +1473,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.dep_excl = MV_ANY; examine.fctr_excl = MV_ANY; - examine.histogramplot = false; - examine.npplot = false; - examine.boxplot = false; - examine.spreadlevelplot = false; + examine.plot = 0; examine.sl_power = 0; examine.dep_vars = NULL; examine.n_dep_vars = 0; @@ -1615,19 +1703,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "BOXPLOT")) { - examine.boxplot = true; + examine.plot |= PLOT_BOXPLOT; } else if (lex_match_id (lexer, "NPPLOT")) { - examine.npplot = true; + examine.plot |= PLOT_NPPLOT; } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogramplot = true; + examine.plot |= PLOT_HISTOGRAM; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevelplot = true; + examine.plot |= PLOT_SPREADLEVEL; examine.sl_power = 0; if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer)) { @@ -1640,15 +1728,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogramplot = false; - examine.npplot = false; - examine.boxplot = false; + examine.plot = 0; } else if (lex_match (lexer, T_ALL)) { - examine.histogramplot = true; - examine.npplot = true; - examine.boxplot = true; + examine.plot = ~0; } else { @@ -1682,7 +1766,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if ( totals_seen && nototals_seen) { - msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); goto error; }