X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fexamine.c;h=4dbafce320ba41d0e1d892034b310ccd22626bb7;hb=4783127cc513fa1bd3aa05f560c4030fa334d669;hp=4f47a6c95a38e95fdaa01785f95f2f51a07aab7d;hpb=5cab4cf3322f29c0ed7134d23740e07382914f20;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 4f47a6c95a..4dbafce320 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc. + Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -47,6 +47,7 @@ #include "math/sort.h" #include "math/order-stats.h" #include "math/percentiles.h" +#include "math/shapiro-wilk.h" #include "math/tukey-hinges.h" #include "math/trimmed-mean.h" @@ -70,7 +71,7 @@ static void append_value_name (const struct variable *var, const union value *val, struct string *str) { var_append_value_name (var, val, str); - if ( var_is_value_missing (var, val, MV_ANY)) + if (var_is_value_missing (var, val)) ds_put_cstr (str, _(" (missing)")); } @@ -90,6 +91,11 @@ enum }; +#define PLOT_HISTOGRAM 0x1 +#define PLOT_BOXPLOT 0x2 +#define PLOT_NPPLOT 0x4 +#define PLOT_SPREADLEVEL 0x8 + struct examine { struct pool *pool; @@ -128,11 +134,8 @@ struct examine double *ptiles; size_t n_percentiles; - bool npplot; - bool histogramplot; - bool boxplot; - bool spreadlevelplot; - int sl_power; + unsigned int plot; + float sl_power; enum bp_mode boxplot_mode; @@ -178,6 +181,7 @@ struct exploratory_stats struct trimmed_mean *trimmed_mean; struct percentile *quartiles[3]; struct percentile **percentiles; + struct shapiro_wilk *shapiro_wilk; struct tukey_hinges *hinges; @@ -231,10 +235,10 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) const struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - if ( y_min > es[v].minimum) + if (y_min > es[v].minimum) y_min = es[v].minimum; - if ( y_max < es[v].maximum) + if (y_max < es[v].maximum) y_max = es[v].maximum; } @@ -306,14 +310,14 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) const struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - if ( y_min > es[v].minimum) + if (y_min > es[v].minimum) y_min = es[v].minimum; - if ( y_max < es[v].maximum) + if (y_max < es[v].maximum) y_max = es[v].maximum; } - if ( iact->n_vars == 0) + if (iact->n_vars == 0) ds_put_format (&title, _("Boxplot")); else { @@ -369,7 +373,7 @@ show_npplot (const struct examine *cmd, int iact_idx) int grp; for (grp = 0; grp < n_cats; ++grp) { - struct chart_item *npp, *dnpp; + struct chart *npp, *dnpp; struct casereader *reader; struct np *np; @@ -385,7 +389,7 @@ show_npplot (const struct examine *cmd, int iact_idx) ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); - if ( iact->n_vars > 0) + if (iact->n_vars > 0) { ds_put_cstr (&label, " ("); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) @@ -412,13 +416,13 @@ show_npplot (const struct examine *cmd, int iact_idx) if (npp == NULL || dnpp == NULL) { msg (MW, _("Not creating NP plot because data set is empty.")); - chart_item_unref (npp); - chart_item_unref (dnpp); + chart_unref (npp); + chart_unref (dnpp); } else { - chart_item_submit (npp); - chart_item_submit (dnpp); + chart_submit (npp); + chart_submit (dnpp); } casereader_destroy (reader); @@ -442,7 +446,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { int grp; - struct chart_item *sl; + struct chart *sl; struct string label; ds_init_cstr (&label, @@ -473,7 +477,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) if (sl == NULL) msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label)); else - chart_item_submit (sl); + chart_submit (sl); ds_destroy (&label); } @@ -510,7 +514,7 @@ show_histogram (const struct examine *cmd, int iact_idx) ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); - if ( iact->n_vars > 0) + if (iact->n_vars > 0) { ds_put_cstr (&label, " ("); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) @@ -530,8 +534,8 @@ show_histogram (const struct examine *cmd, int iact_idx) moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL); - chart_item_submit - ( histogram_chart_create (es[v].histogram->gsl_hist, + chart_submit + (histogram_chart_create (es[v].histogram->gsl_hist, ds_cstr (&label), n, mean, sqrt (var), false)); @@ -547,7 +551,7 @@ new_value_with_missing_footnote (const struct variable *var, struct pivot_footnote *missing_footnote) { struct pivot_value *pv = pivot_value_new_var_value (var, value); - if (var_is_value_missing (var, value, MV_USER)) + if (var_is_value_missing (var, value) == MV_USER) pivot_value_add_footnote (pv, missing_footnote); return pv; } @@ -558,7 +562,7 @@ create_interaction_dimensions (struct pivot_table *table, const struct interaction *iact, struct pivot_footnote *missing_footnote) { - for (size_t i = iact->n_vars; i-- > 0; ) + for (size_t i = iact->n_vars; i-- > 0;) { const struct variable *var = iact->vars[i]; struct pivot_dimension *d = pivot_dimension_create__ ( @@ -585,7 +589,6 @@ static void percentiles_report (const struct examine *cmd, int iact_idx) { struct pivot_table *table = pivot_table_create (N_("Percentiles")); - table->omit_empty = true; struct pivot_dimension *percentiles = pivot_dimension_create ( table, PIVOT_AXIS_COLUMN, N_("Percentiles")); @@ -659,11 +662,75 @@ percentiles_report (const struct examine *cmd, int iact_idx) pivot_table_submit (table); } +static void +normality_report (const struct examine *cmd, int iact_idx) +{ + struct pivot_table *table = pivot_table_create (N_("Tests of Normality")); + + struct pivot_dimension *test = + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"), + N_("Statistic"), + N_("df"), PIVOT_RC_COUNT, + N_("Sig.")); + + test->root->show_label = true; + + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); + + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); + + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); + + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) + { + indexes[table->n_dimensions - 1] = + pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); + + for (size_t i = 0; i < n_cats; ++i) + { + indexes[1] = i; + + const struct exploratory_stats *es + = categoricals_get_user_data_by_category_real ( + cmd->cats, iact_idx, i); + + struct shapiro_wilk *sw = es[v].shapiro_wilk; + + if (sw == NULL) + continue; + + double w = shapiro_wilk_calculate (sw); + + int j = 0; + indexes[0] = j; + + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (w)); + + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (sw->n)); + + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (shapiro_wilk_significance (sw->n, w))); + } + } + + free (indexes); + + pivot_table_submit (table); +} + + static void descriptives_report (const struct examine *cmd, int iact_idx) { struct pivot_table *table = pivot_table_create (N_("Descriptives")); - table->omit_empty = true; pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Aspect"), N_("Statistic"), N_("Std. Error")); @@ -760,7 +827,6 @@ static void extremes_report (const struct examine *cmd, int iact_idx) { struct pivot_table *table = pivot_table_create (N_("Extreme Values")); - table->omit_empty = true; struct pivot_dimension *statistics = pivot_dimension_create ( table, PIVOT_AXIS_COLUMN, N_("Statistics")); @@ -775,7 +841,9 @@ extremes_report (const struct examine *cmd, int iact_idx) for (size_t i = 0; i < cmd->disp_extremes; i++) pivot_category_create_leaf (order->root, pivot_value_new_integer (i + 1)); - pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Extreme"), + pivot_dimension_create (table, PIVOT_AXIS_ROW, + /* TRANSLATORS: This is a noun, not an adjective. */ + N_("Extreme"), N_("Highest"), N_("Lowest")); const struct interaction *iact = cmd->iacts[iact_idx]; @@ -847,7 +915,6 @@ summary_report (const struct examine *cmd, int iact_idx) { struct pivot_table *table = pivot_table_create ( N_("Case Processing Summary")); - table->omit_empty = true; pivot_table_set_weight_var (table, dict_get_weight (cmd->dict)); pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), @@ -924,7 +991,7 @@ parse_interaction (struct lexer *lexer, struct examine *ex) const struct variable *v = NULL; struct interaction *iact = NULL; - if ( lex_match_variable (lexer, ex->dict, &v)) + if (lex_match_variable (lexer, ex->dict, &v)) { iact = interaction_create (v); @@ -986,7 +1053,8 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, { const struct variable *var = examine->dep_vars[v]; - if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) + if (var_is_value_missing (var, case_data (c, var)) + & examine->dep_excl) { es[v].missing += weight; this_case_is_missing = true; @@ -1001,9 +1069,9 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, { struct ccase *outcase ; const struct variable *var = examine->dep_vars[v]; - const double x = case_data (c, var)->f; + const double x = case_num (c, var); - if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) + if (var_is_value_missing (var, case_data (c, var)) & examine->dep_excl) { es[v].missing += weight; continue; @@ -1023,11 +1091,11 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, /* Save the value and the ID to the writer */ assert (examine->id_idx != -1); - case_data_rw_idx (outcase, EX_VAL)->f = x; + *case_num_rw_idx (outcase, EX_VAL) = x; value_copy (case_data_rw_idx (outcase, EX_ID), case_data_idx (c, examine->id_idx), examine->id_width); - case_data_rw_idx (outcase, EX_WT)->f = weight; + *case_num_rw_idx (outcase, EX_WT) = weight; es[v].cc += weight; @@ -1053,7 +1121,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct casereader *reader; struct ccase *c; - if (examine->histogramplot && es[v].non_missing > 0) + if (examine->plot & PLOT_HISTOGRAM && es[v].non_missing > 0) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) @@ -1067,7 +1135,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); es[v].sorted_writer = NULL; - imax = casereader_get_case_cnt (es[v].sorted_reader); + imax = casereader_get_n_cases (es[v].sorted_reader); es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); @@ -1081,8 +1149,8 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) { - const double val = case_data_idx (c, EX_VAL)->f; - double wt = case_data_idx (c, EX_WT)->f; + const double val = case_num_idx (c, EX_VAL); + double wt = case_num_idx (c, EX_WT); wt = var_force_valid_weight (examine->wv, wt, &warn); moments_pass_two (es[v].mom, val, wt); @@ -1130,12 +1198,12 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { const int n_os = 5 + examine->n_percentiles; - struct order_stats **os ; es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles)); es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05); + es[v].shapiro_wilk = NULL; - os = xcalloc (n_os, sizeof *os); + struct order_stats **os = XCALLOC (n_os, struct order_stats *); os[0] = &es[v].trimmed_mean->parent; es[v].quartiles[0] = percentile_create (0.25, es[v].cc); @@ -1162,7 +1230,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) free (os); } - if (examine->boxplot) + if (examine->plot & PLOT_BOXPLOT) { struct order_stats *os; @@ -1175,7 +1243,24 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) EX_WT, EX_VAL); } - if (examine->npplot) + if (examine->plot) + { + double mean; + + moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL); + + es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean); + + if (es[v].shapiro_wilk) + { + struct order_stats *os = &es[v].shapiro_wilk->parent; + order_stats_accumulate_idx (&os, 1, + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); + } + } + + if (examine->plot & PLOT_NPPLOT) { double n, mean, var; struct order_stats *os; @@ -1230,6 +1315,12 @@ cleanup_exploratory_stats (struct examine *cmd) stat->destroy (stat); } + if (es[v].shapiro_wilk) + { + stat = &es[v].shapiro_wilk->parent.parent; + stat->destroy (stat); + } + os = &es[v].trimmed_mean->parent; stat = &os->parent; stat->destroy (stat); @@ -1281,7 +1372,7 @@ run_examine (struct examine *cmd, struct casereader *input) { struct ccase *c = casereader_peek (input, 0); - cmd->id_idx = case_get_value_cnt (c); + cmd->id_idx = case_get_n_values (c); input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); @@ -1309,7 +1400,7 @@ run_examine (struct examine *cmd, struct casereader *input) if (cmd->n_percentiles > 0) percentiles_report (cmd, i); - if (cmd->boxplot) + if (cmd->plot & PLOT_BOXPLOT) { switch (cmd->boxplot_mode) { @@ -1325,17 +1416,20 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogramplot) + if (cmd->plot & PLOT_HISTOGRAM) show_histogram (cmd, i); - if (cmd->npplot) + if (cmd->plot & PLOT_NPPLOT) show_npplot (cmd, i); - if (cmd->spreadlevelplot) + if (cmd->plot & PLOT_SPREADLEVEL) show_spreadlevel (cmd, i); if (cmd->descriptives) descriptives_report (cmd, i); + + if (cmd->plot) + normality_report (cmd, i); } cleanup_exploratory_stats (cmd); @@ -1382,10 +1476,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.dep_excl = MV_ANY; examine.fctr_excl = MV_ANY; - examine.histogramplot = false; - examine.npplot = false; - examine.boxplot = false; - examine.spreadlevelplot = false; + examine.plot = 0; examine.sl_power = 0; examine.dep_vars = NULL; examine.n_dep_vars = 0; @@ -1395,7 +1486,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "VARIABLES")) { - if (! lex_force_match (lexer, T_EQUALS) ) + if (! lex_force_match (lexer, T_EQUALS)) goto error; } @@ -1445,16 +1536,10 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) int extr = 5; if (lex_match (lexer, T_LPAREN)) { - if (!lex_force_int (lexer)) + if (!lex_force_int_range (lexer, "EXTREME", 0, INT_MAX)) goto error; extr = lex_integer (lexer); - if (extr < 0) - { - msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0); - extr = 5; - } - lex_get (lexer); if (! lex_force_match (lexer, T_RPAREN)) goto error; @@ -1483,15 +1568,10 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { while (lex_is_number (lexer)) { + if (!lex_force_num_range_open (lexer, "PERCENTILES", 0, 100)) + goto error; double p = lex_number (lexer); - if ( p <= 0 || p >= 100.0) - { - lex_error (lexer, - _("Percentiles must lie in the range (0, 100)")); - goto error; - } - examine.n_percentiles++; examine.ptiles = xrealloc (examine.ptiles, @@ -1576,7 +1656,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "REPORT")) { - examine.fctr_excl = MV_NEVER; + examine.fctr_excl = 0; } else if (lex_match_id (lexer, "NOREPORT")) { @@ -1615,23 +1695,23 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "BOXPLOT")) { - examine.boxplot = true; + examine.plot |= PLOT_BOXPLOT; } else if (lex_match_id (lexer, "NPPLOT")) { - examine.npplot = true; + examine.plot |= PLOT_NPPLOT; } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogramplot = true; + examine.plot |= PLOT_HISTOGRAM; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevelplot = true; + examine.plot |= PLOT_SPREADLEVEL; examine.sl_power = 0; - if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer)) + if (lex_match (lexer, T_LPAREN) && lex_force_num (lexer)) { - examine.sl_power = lex_integer (lexer); + examine.sl_power = lex_number (lexer); lex_get (lexer); if (! lex_force_match (lexer, T_RPAREN)) @@ -1640,15 +1720,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogramplot = false; - examine.npplot = false; - examine.boxplot = false; + examine.plot = 0; } else if (lex_match (lexer, T_ALL)) { - examine.histogramplot = true; - examine.npplot = true; - examine.boxplot = true; + examine.plot = ~0; } else { @@ -1660,7 +1736,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "CINTERVAL")) { - if ( !lex_force_num (lexer)) + if (!lex_force_num (lexer)) goto error; examine.conf = lex_number (lexer); @@ -1680,15 +1756,15 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } - if ( totals_seen && nototals_seen) + if (totals_seen && nototals_seen) { - msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); goto error; } /* If totals have been requested or if there are no factors in this analysis, then the totals need to be included. */ - if ( !nototals_seen || examine.n_iacts == 1) + if (!nototals_seen || examine.n_iacts == 1) { examine.iacts = &iacts_mem[0]; } @@ -1700,7 +1776,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } - if ( examine.id_var ) + if (examine.id_var) { examine.id_idx = var_get_case_index (examine.id_var); examine.id_width = var_get_width (examine.id_var); @@ -1725,8 +1801,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if (percentiles_seen && examine.n_percentiles == 0) { examine.n_percentiles = 7; - examine.ptiles = xcalloc (examine.n_percentiles, - sizeof (*examine.ptiles)); + examine.ptiles = xcalloc (examine.n_percentiles, sizeof (*examine.ptiles)); examine.ptiles[0] = 5; examine.ptiles[1] = 10;