X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.c;h=4dbafce320ba41d0e1d892034b310ccd22626bb7;hb=bcaaaebdde43e26a8d27c53590f34bf29eb56406;hp=9bc287e510fcd36677a62defb77b0dde2ba167d7;hpb=1c817faf0b4f8f7e53d032c805f775e239c6a9f2;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 9bc287e510..4dbafce320 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,7 +1,7 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012 Free Software Foundation, Inc. - + Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc. + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -11,7 +11,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -47,6 +47,7 @@ #include "math/sort.h" #include "math/order-stats.h" #include "math/percentiles.h" +#include "math/shapiro-wilk.h" #include "math/tukey-hinges.h" #include "math/trimmed-mean.h" @@ -60,17 +61,17 @@ #include "language/lexer/value-parser.h" #include "language/lexer/variable-parser.h" -#include "output/tab.h" +#include "output/pivot-table.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid -static void +static void append_value_name (const struct variable *var, const union value *val, struct string *str) { var_append_value_name (var, val, str); - if ( var_is_value_missing (var, val, MV_ANY)) + if (var_is_value_missing (var, val)) ds_put_cstr (str, _(" (missing)")); } @@ -90,6 +91,11 @@ enum }; +#define PLOT_HISTOGRAM 0x1 +#define PLOT_BOXPLOT 0x2 +#define PLOT_NPPLOT 0x4 +#define PLOT_SPREADLEVEL 0x8 + struct examine { struct pool *pool; @@ -127,12 +133,9 @@ struct examine enum pc_alg pc_alg; double *ptiles; size_t n_percentiles; - - bool npplot; - bool histogram; - bool boxplot; - bool spreadlevel; - int sl_power; + + unsigned int plot; + float sl_power; enum bp_mode boxplot_mode; @@ -165,7 +168,7 @@ struct exploratory_stats struct extremity *minima; struct extremity *maxima; - /* + /* Minimum should alway equal mimima[0].val. Likewise, maximum should alway equal maxima[0].val. This redundancy exists as an optimisation effort. @@ -178,6 +181,7 @@ struct exploratory_stats struct trimmed_mean *trimmed_mean; struct percentile *quartiles[3]; struct percentile **percentiles; + struct shapiro_wilk *shapiro_wilk; struct tukey_hinges *hinges; @@ -196,54 +200,6 @@ struct exploratory_stats double cmin; }; - -/* Returns an array of (iact->n_vars) pointers to union value initialised to NULL. - The caller must free this array when no longer required. */ -static const union value ** -previous_value_alloc (const struct interaction *iact) -{ - int ivar_idx; - - const union value **prev_val = xcalloc (iact->n_vars, sizeof (*prev_val)); - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - prev_val[ivar_idx] = NULL; - - return prev_val; -} - -/* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */ -static int -previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val) -{ - int ivar_idx; - int diff_idx = -1; - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const int width = var_get_width (ivar); - const union value *val = case_data (c, ivar); - - if (prev_val[ivar_idx]) - if (! value_equal (prev_val[ivar_idx], val, width)) - { - diff_idx = ivar_idx; - break; - } - } - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); - - prev_val[ivar_idx] = val; - } - return diff_idx; -} - - static void show_boxplot_grouped (const struct examine *cmd, int iact_idx) { @@ -273,19 +229,19 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) } else ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v])); - + for (grp = 0; grp < n_cats; ++grp) { const struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - if ( y_min > es[v].minimum) + if (y_min > es[v].minimum) y_min = es[v].minimum; - if ( y_max < es[v].maximum) + if (y_max < es[v].maximum) y_max = es[v].maximum; } - + boxplot = boxplot_create (y_min, y_max, ds_cstr (&title)); ds_destroy (&title); @@ -298,26 +254,33 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) const struct ccase *c = categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp); - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); ds_init_empty (&label); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { + struct string l; const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - - ds_put_cstr (&label, var_to_string (ivar)); - ds_put_cstr (&label, " = "); - append_value_name (ivar, val, &label); - ds_put_cstr (&label, "; "); + ds_init_empty (&l); + + append_value_name (ivar, val, &l); + ds_ltrim (&l, ss_cstr (" ")); + + ds_put_substring (&label, l.ss); + if (ivar_idx < iact->n_vars - 1) + ds_put_cstr (&label, "; "); + + ds_destroy (&l); } boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label)); + es[v].box_whisker = NULL; ds_destroy (&label); } - + boxplot_submit (boxplot); } } @@ -347,14 +310,14 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) const struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - if ( y_min > es[v].minimum) + if (y_min > es[v].minimum) y_min = es[v].minimum; - if ( y_max < es[v].maximum) + if (y_max < es[v].maximum) y_max = es[v].maximum; } - if ( iact->n_vars == 0) + if (iact->n_vars == 0) ds_put_format (&title, _("Boxplot")); else { @@ -365,7 +328,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); @@ -384,11 +347,12 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - boxplot_add_box (boxplot, es[v].box_whisker, + boxplot_add_box (boxplot, es[v].box_whisker, var_to_string (cmd->dep_vars[v])); + es[v].box_whisker = NULL; } boxplot_submit (boxplot); @@ -409,7 +373,7 @@ show_npplot (const struct examine *cmd, int iact_idx) int grp; for (grp = 0; grp < n_cats; ++grp) { - struct chart_item *npp, *dnpp; + struct chart *npp, *dnpp; struct casereader *reader; struct np *np; @@ -422,26 +386,26 @@ show_npplot (const struct examine *cmd, int iact_idx) categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); struct string label; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); - if ( iact->n_vars > 0) + if (iact->n_vars > 0) { ds_put_cstr (&label, " ("); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); - + } ds_put_cstr (&label, ")"); } - + np = es[v].np; reader = casewriter_make_reader (np->writer); np->writer = NULL; @@ -452,13 +416,13 @@ show_npplot (const struct examine *cmd, int iact_idx) if (npp == NULL || dnpp == NULL) { msg (MW, _("Not creating NP plot because data set is empty.")); - chart_item_unref (npp); - chart_item_unref (dnpp); + chart_unref (npp); + chart_unref (dnpp); } else { - chart_item_submit (npp); - chart_item_submit (dnpp); + chart_submit (npp); + chart_submit (dnpp); } casereader_destroy (reader); @@ -482,10 +446,10 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { int grp; - struct chart_item *sl; + struct chart *sl; struct string label; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); if (iact->n_vars > 0) @@ -494,7 +458,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) interaction_to_string (iact, &label); ds_put_cstr (&label, ")"); } - + sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power); for (grp = 0; grp < n_cats; ++grp) @@ -512,8 +476,8 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) if (sl == NULL) msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label)); - else - chart_item_submit (sl); + else + chart_submit (sl); ds_destroy (&label); } @@ -547,22 +511,22 @@ show_histogram (const struct examine *cmd, int iact_idx) if (es[v].histogram == NULL) continue; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); - if ( iact->n_vars > 0) + if (iact->n_vars > 0) { ds_put_cstr (&label, " ("); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); - + } ds_put_cstr (&label, ")"); } @@ -570,914 +534,454 @@ show_histogram (const struct examine *cmd, int iact_idx) moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL); - chart_item_submit - ( histogram_chart_create (es[v].histogram->gsl_hist, + chart_submit + (histogram_chart_create (es[v].histogram->gsl_hist, ds_cstr (&label), n, mean, sqrt (var), false)); - + ds_destroy (&label); } } } +static struct pivot_value * +new_value_with_missing_footnote (const struct variable *var, + const union value *value, + struct pivot_footnote *missing_footnote) +{ + struct pivot_value *pv = pivot_value_new_var_value (var, value); + if (var_is_value_missing (var, value) == MV_USER) + pivot_value_add_footnote (pv, missing_footnote); + return pv; +} + static void -percentiles_report (const struct examine *cmd, int iact_idx) +create_interaction_dimensions (struct pivot_table *table, + const struct categoricals *cats, + const struct interaction *iact, + struct pivot_footnote *missing_footnote) { - const struct interaction *iact = cmd->iacts[iact_idx]; - int i, v; - const int heading_columns = 1 + iact->n_vars + 1; - const int heading_rows = 2; - struct tab_table *t; + for (size_t i = iact->n_vars; i-- > 0;) + { + const struct variable *var = iact->vars[i]; + struct pivot_dimension *d = pivot_dimension_create__ ( + table, PIVOT_AXIS_ROW, pivot_value_new_variable (var)); + d->root->show_label = true; + + size_t n; + union value *values = categoricals_get_var_values (cats, var, &n); + for (size_t j = 0; j < n; j++) + pivot_category_create_leaf ( + d->root, new_value_with_missing_footnote (var, &values[j], + missing_footnote)); + } +} - const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); +static struct pivot_footnote * +create_missing_footnote (struct pivot_table *table) +{ + return pivot_table_create_footnote ( + table, pivot_value_new_text (N_("User-missing value."))); +} - const int rows_per_cat = 2; - const int rows_per_var = n_cats * rows_per_cat; +static void +percentiles_report (const struct examine *cmd, int iact_idx) +{ + struct pivot_table *table = pivot_table_create (N_("Percentiles")); + + struct pivot_dimension *percentiles = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Percentiles")); + percentiles->root->show_label = true; + for (int i = 0; i < cmd->n_percentiles; ++i) + pivot_category_create_leaf ( + percentiles->root, + pivot_value_new_user_text_nocopy (xasprintf ("%g", cmd->ptiles[i]))); - const int nr = heading_rows + cmd->n_dep_vars * rows_per_var; - const int nc = heading_columns + cmd->n_percentiles; + pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"), + N_("Weighted Average"), N_("Tukey's Hinges")); - t = tab_create (nc, nr); - tab_title (t, _("Percentiles")); + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); - tab_headers (t, heading_columns, 0, heading_rows, 0); + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - /* Internal Vertical lines */ - tab_box (t, -1, -1, -1, TAL_1, - heading_columns, 0, nc - 1, nr - 1); + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - /* External Frame */ - tab_box (t, TAL_2, TAL_2, -1, -1, - 0, 0, nc - 1, nr - 1); + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) + { + indexes[table->n_dimensions - 1] = pivot_category_create_leaf ( + dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + for (size_t i = 0; i < n_cats; ++i) + { + for (size_t j = 0; j < iact->n_vars; j++) + { + int idx = categoricals_get_value_index_by_category_real ( + cmd->cats, iact_idx, i, j); + indexes[table->n_dimensions - 2 - j] = idx; + } - tab_joint_text (t, heading_columns, 0, - nc - 1, 0, - TAT_TITLE | TAB_CENTER, - _("Percentiles") - ); + const struct exploratory_stats *ess + = categoricals_get_user_data_by_category_real ( + cmd->cats, iact_idx, i); + const struct exploratory_stats *es = ess + v; - tab_hline (t, TAL_1, heading_columns, nc - 1, 1); + double hinges[3]; + tukey_hinges_calculate (es->hinges, hinges); + for (size_t pc_idx = 0; pc_idx < cmd->n_percentiles; ++pc_idx) + { + indexes[0] = pc_idx; + + indexes[1] = 0; + double value = percentile_calculate (es->percentiles[pc_idx], + cmd->pc_alg); + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (value)); + + double hinge = (cmd->ptiles[pc_idx] == 25.0 ? hinges[0] + : cmd->ptiles[pc_idx] == 50.0 ? hinges[1] + : cmd->ptiles[pc_idx] == 75.0 ? hinges[2] + : SYSMIS); + if (hinge != SYSMIS) + { + indexes[1] = 1; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (hinge)); + } + } + } - for (i = 0; i < cmd->n_percentiles; ++i) - { - tab_text_format (t, heading_columns + i, 1, - TAT_TITLE | TAB_CENTER, - _("%g"), cmd->ptiles[i]); } + free (indexes); - for (i = 0; i < iact->n_vars; ++i) - { - tab_text (t, - 1 + i, 1, - TAT_TITLE, - var_to_string (iact->vars[i]) - ); - } + pivot_table_submit (table); +} +static void +normality_report (const struct examine *cmd, int iact_idx) +{ + struct pivot_table *table = pivot_table_create (N_("Tests of Normality")); + struct pivot_dimension *test = + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"), + N_("Statistic"), + N_("df"), PIVOT_RC_COUNT, + N_("Sig.")); - if (n_cats > 0) - { - tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1); + test->root->show_label = true; - for (v = 0; v < cmd->n_dep_vars; ++v) - { - const union value **prev_vals = previous_value_alloc (iact); - - int ivar_idx; - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - - tab_text (t, - 0, heading_rows + v * rows_per_var, - TAT_TITLE | TAB_LEFT, - var_to_string (cmd->dep_vars[v]) - ); - - for (i = 0; i < n_cats; ++i) - { - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, - iact_idx, i); + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); - const struct exploratory_stats *ess = - categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - const struct exploratory_stats *es = ess + v; + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - int diff_idx = previous_value_record (iact, c, prev_vals); + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) + { + indexes[table->n_dimensions - 1] = + pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); - double hinges[3]; - int p; + for (size_t i = 0; i < n_cats; ++i) + { + indexes[1] = i; - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); - - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - append_value_name (ivar, val, &str); - - tab_text (t, - 1 + ivar_idx, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); - - ds_destroy (&str); - } - } + const struct exploratory_stats *es + = categoricals_get_user_data_by_category_real ( + cmd->cats, iact_idx, i); - if ( diff_idx != -1 && diff_idx < iact->n_vars) - { - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + v * rows_per_var + i * rows_per_cat - ); - } + struct shapiro_wilk *sw = es[v].shapiro_wilk; - tab_text (t, heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - gettext (ptile_alg_desc [cmd->pc_alg])); + if (sw == NULL) + continue; - tukey_hinges_calculate (es->hinges, hinges); + double w = shapiro_wilk_calculate (sw); - for (p = 0; p < cmd->n_percentiles; ++p) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat, - 0, - percentile_calculate (es->percentiles[p], cmd->pc_alg), - 0); - - if (cmd->ptiles[p] == 25.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[0], - 0); - } - else if (cmd->ptiles[p] == 50.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[1], - 0); - } - else if (cmd->ptiles[p] == 75.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[2], - 0); - } - } + int j = 0; + indexes[0] = j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (w)); - tab_text (t, heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - TAT_TITLE | TAB_LEFT, - _("Tukey's Hinges")); - - } + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (sw->n)); - free (prev_vals); + indexes[0] = ++j; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (shapiro_wilk_significance (sw->n, w))); } } - tab_submit (t); + + free (indexes); + + pivot_table_submit (table); } + static void descriptives_report (const struct examine *cmd, int iact_idx) { - const struct interaction *iact = cmd->iacts[iact_idx]; - int i, v; - const int heading_columns = 1 + iact->n_vars + 2; - const int heading_rows = 1; - struct tab_table *t; - - size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); - - const int rows_per_cat = 13; - const int rows_per_var = n_cats * rows_per_cat; - - const int nr = heading_rows + cmd->n_dep_vars * rows_per_var; - const int nc = 2 + heading_columns; - - t = tab_create (nc, nr); - tab_title (t, _("Descriptives")); - - tab_headers (t, heading_columns, 0, heading_rows, 0); - - /* Internal Vertical lines */ - tab_box (t, -1, -1, -1, TAL_1, - heading_columns, 0, nc - 1, nr - 1); - - /* External Frame */ - tab_box (t, TAL_2, TAL_2, -1, -1, - 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + struct pivot_table *table = pivot_table_create (N_("Descriptives")); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Aspect"), + N_("Statistic"), N_("Std. Error")); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Statistics"), N_("Mean")); + struct pivot_category *interval = pivot_category_create_group__ ( + statistics->root, + pivot_value_new_text_format (N_("%g%% Confidence Interval for Mean"), + cmd->conf * 100.0)); + pivot_category_create_leaves (interval, N_("Lower Bound"), + N_("Upper Bound")); + pivot_category_create_leaves ( + statistics->root, N_("5% Trimmed Mean"), N_("Median"), N_("Variance"), + N_("Std. Deviation"), N_("Minimum"), N_("Maximum"), N_("Range"), + N_("Interquartile Range"), N_("Skewness"), N_("Kurtosis")); + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); - tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE, - _("Statistic")); + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE, - _("Std. Error")); + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - for (i = 0; i < iact->n_vars; ++i) + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) { - tab_text (t, - 1 + i, 0, - TAT_TITLE, - var_to_string (iact->vars[i]) - ); - } + indexes[table->n_dimensions - 1] = pivot_category_create_leaf ( + dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); - for (v = 0; v < cmd->n_dep_vars; ++v) - { - const union value **prev_val = previous_value_alloc (iact); - - int ivar_idx; - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - - tab_text (t, - 0, heading_rows + v * rows_per_var, - TAT_TITLE | TAB_LEFT, - var_to_string (cmd->dep_vars[v]) - ); - - for (i = 0; i < n_cats; ++i) + for (size_t i = 0; i < n_cats; ++i) { - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, - iact_idx, i); - - const struct exploratory_stats *ess = - categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); + for (size_t j = 0; j < iact->n_vars; j++) + { + int idx = categoricals_get_value_index_by_category_real ( + cmd->cats, iact_idx, i, j); + indexes[table->n_dimensions - 2 - j] = idx; + } + const struct exploratory_stats *ess + = categoricals_get_user_data_by_category_real (cmd->cats, + iact_idx, i); const struct exploratory_stats *es = ess + v; - const int diff_idx = previous_value_record (iact, c, prev_val); - double m0, m1, m2, m3, m4; - double tval; - moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4); + double tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0); - tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0); - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) + struct entry { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); - - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - append_value_name (ivar, val, &str); - - tab_text (t, - 1 + ivar_idx, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); - - ds_destroy (&str); - } + int stat_idx; + int aspect_idx; + double x; } - - if ( diff_idx != -1 && diff_idx < iact->n_vars) + entries[] = { + { 0, 0, m1 }, + { 0, 1, calc_semean (m2, m0) }, + { 1, 0, m1 - tval * calc_semean (m2, m0) }, + { 2, 0, m1 + tval * calc_semean (m2, m0) }, + { 3, 0, trimmed_mean_calculate (es->trimmed_mean) }, + { 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg) }, + { 5, 0, m2 }, + { 6, 0, sqrt (m2) }, + { 7, 0, es->minima[0].val }, + { 8, 0, es->maxima[0].val }, + { 9, 0, es->maxima[0].val - es->minima[0].val }, + { 10, 0, (percentile_calculate (es->quartiles[2], cmd->pc_alg) - + percentile_calculate (es->quartiles[0], cmd->pc_alg)) }, + { 11, 0, m3 }, + { 11, 1, calc_seskew (m0) }, + { 12, 0, m4 }, + { 12, 1, calc_sekurt (m0) }, + }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) { - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + v * rows_per_var + i * rows_per_cat - ); + const struct entry *e = &entries[j]; + indexes[0] = e->aspect_idx; + indexes[1] = e->stat_idx; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (e->x)); } - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAB_LEFT, - _("Mean") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat, - 0, m1, 0); - - tab_double (t, - 1 + iact->n_vars + 3, - heading_rows + v * rows_per_var + i * rows_per_cat, - 0, calc_semean (m2, m0), 0); - - tab_text_format (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - TAB_LEFT, - _("%g%% Confidence Interval for Mean"), - cmd->conf * 100.0 - ); - - tab_text (t, - 1 + iact->n_vars + 1, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - TAB_LEFT, - _("Lower Bound") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, m1 - tval * calc_semean (m2, m0), 0); - - - tab_text (t, - 1 + iact->n_vars + 1, - heading_rows + v * rows_per_var + i * rows_per_cat + 2, - TAB_LEFT, - _("Upper Bound") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 2, - 0, m1 + tval * calc_semean (m2, m0), 0); - - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 3, - TAB_LEFT, - _("5% Trimmed Mean") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 3, - 0, - trimmed_mean_calculate (es->trimmed_mean), - 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 4, - TAB_LEFT, - _("Median") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 4, - 0, - percentile_calculate (es->quartiles[1], cmd->pc_alg), - 0); - - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 5, - TAB_LEFT, - _("Variance") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 5, - 0, m2, 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 6, - TAB_LEFT, - _("Std. Deviation") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 6, - 0, sqrt (m2), 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 7, - TAB_LEFT, - _("Minimum") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 7, - 0, - es->minima[0].val, - 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 8, - TAB_LEFT, - _("Maximum") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 8, - 0, - es->maxima[0].val, - 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 9, - TAB_LEFT, - _("Range") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 9, - 0, - es->maxima[0].val - es->minima[0].val, - 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 10, - TAB_LEFT, - _("Interquartile Range") - ); - - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 10, - 0, - percentile_calculate (es->quartiles[2], cmd->pc_alg) - - percentile_calculate (es->quartiles[0], cmd->pc_alg), - 0); - - - - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 11, - TAB_LEFT, - _("Skewness") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, m3, 0); - - tab_double (t, - 1 + iact->n_vars + 3, - heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, calc_seskew (m0), 0); - - tab_text (t, - 1 + iact->n_vars, - heading_rows + v * rows_per_var + i * rows_per_cat + 12, - TAB_LEFT, - _("Kurtosis") - ); - - tab_double (t, - 1 + iact->n_vars + 2, - heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, m4, 0); - - tab_double (t, - 1 + iact->n_vars + 3, - heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, calc_sekurt (m0), 0); } - - free (prev_val); } - tab_submit (t); + + free (indexes); + + pivot_table_submit (table); } static void extremes_report (const struct examine *cmd, int iact_idx) { - const struct interaction *iact = cmd->iacts[iact_idx]; - int i, v; - const int heading_columns = 1 + iact->n_vars + 2; - const int heading_rows = 1; - struct tab_table *t; - - size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); - - const int rows_per_cat = 2 * cmd->disp_extremes; - const int rows_per_var = n_cats * rows_per_cat; - - const int nr = heading_rows + cmd->n_dep_vars * rows_per_var; - const int nc = 2 + heading_columns; - - t = tab_create (nc, nr); - tab_title (t, _("Extreme Values")); - - tab_headers (t, heading_columns, 0, heading_rows, 0); - - /* Internal Vertical lines */ - tab_box (t, -1, -1, -1, TAL_1, - heading_columns, 0, nc - 1, nr - 1); - - /* External Frame */ - tab_box (t, TAL_2, TAL_2, -1, -1, - 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); + struct pivot_table *table = pivot_table_create (N_("Extreme Values")); + + struct pivot_dimension *statistics = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Statistics")); + pivot_category_create_leaf (statistics->root, + (cmd->id_var + ? pivot_value_new_variable (cmd->id_var) + : pivot_value_new_text (N_("Case Number")))); + pivot_category_create_leaves (statistics->root, N_("Value")); + + struct pivot_dimension *order = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Order")); + for (size_t i = 0; i < cmd->disp_extremes; i++) + pivot_category_create_leaf (order->root, pivot_value_new_integer (i + 1)); + + pivot_dimension_create (table, PIVOT_AXIS_ROW, + /* TRANSLATORS: This is a noun, not an adjective. */ + N_("Extreme"), + N_("Highest"), N_("Lowest")); + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); - if ( cmd->id_var ) - tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE, - var_to_string (cmd->id_var)); - else - tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE, - _("Case Number")); + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE, - _("Value")); + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - for (i = 0; i < iact->n_vars; ++i) + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) { - tab_text (t, - 1 + i, 0, - TAT_TITLE, - var_to_string (iact->vars[i]) - ); - } + indexes[table->n_dimensions - 1] = pivot_category_create_leaf ( + dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); - for (v = 0; v < cmd->n_dep_vars; ++v) - { - const union value **prev_val = previous_value_alloc (iact); - - int ivar_idx; - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - - tab_text (t, - 0, heading_rows + v * rows_per_var, - TAT_TITLE, - var_to_string (cmd->dep_vars[v]) - ); - - for (i = 0; i < n_cats; ++i) + for (size_t i = 0; i < n_cats; ++i) { - int e; - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, iact_idx, i); - - const struct exploratory_stats *ess = - categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); + for (size_t j = 0; j < iact->n_vars; j++) + { + int idx = categoricals_get_value_index_by_category_real ( + cmd->cats, iact_idx, i, j); + indexes[table->n_dimensions - 2 - j] = idx; + } + const struct exploratory_stats *ess + = categoricals_get_user_data_by_category_real (cmd->cats, + iact_idx, i); const struct exploratory_stats *es = ess + v; - int diff_idx = previous_value_record (iact, c, prev_val); - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) + for (int e = 0 ; e < cmd->disp_extremes; ++e) { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); + indexes[1] = e; - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - append_value_name (ivar, val, &str); - - tab_text (t, - 1 + ivar_idx, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); - - ds_destroy (&str); + for (size_t j = 0; j < 2; j++) + { + const struct extremity *extremity + = j ? &es->minima[e] : &es->maxima[e]; + indexes[2] = j; + + indexes[0] = 0; + pivot_table_put ( + table, indexes, table->n_dimensions, + (cmd->id_var + ? new_value_with_missing_footnote (cmd->id_var, + &extremity->identity, + missing_footnote) + : pivot_value_new_integer (extremity->identity.f))); + + indexes[0] = 1; + union value val = { .f = extremity->val }; + pivot_table_put ( + table, indexes, table->n_dimensions, + new_value_with_missing_footnote (cmd->dep_vars[v], &val, + missing_footnote)); } } - - if ( diff_idx != -1 && diff_idx < iact->n_vars) - { - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + v * rows_per_var + i * rows_per_cat - ); - } - - tab_text (t, - heading_columns - 2, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAB_RIGHT, - _("Highest")); - - - tab_hline (t, TAL_1, heading_columns - 2, nc - 1, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes - ); - - tab_text (t, - heading_columns - 2, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes, - TAB_RIGHT, - _("Lowest")); - - for (e = 0 ; e < cmd->disp_extremes; ++e) - { - tab_double (t, - heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - TAB_RIGHT, - e + 1, - &F_8_0); - - /* The casenumber */ - if (cmd->id_var) - tab_value (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - TAB_RIGHT, - &es->maxima[e].identity, - cmd->id_var, - NULL); - else - tab_double (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - TAB_RIGHT, - es->maxima[e].identity.f, - &F_8_0); - - tab_double (t, - heading_columns + 1, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - 0, - es->maxima[e].val, - var_get_print_format (cmd->dep_vars[v])); - - - tab_double (t, - heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - TAB_RIGHT, - e + 1, - &F_8_0); - - /* The casenumber */ - if (cmd->id_var) - tab_value (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - TAB_RIGHT, - &es->minima[e].identity, - cmd->id_var, - NULL); - else - tab_double (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - TAB_RIGHT, - es->minima[e].identity.f, - &F_8_0); - - tab_double (t, - heading_columns + 1, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - 0, - es->minima[e].val, - var_get_print_format (cmd->dep_vars[v])); - } } - free (prev_val); } + free (indexes); - tab_submit (t); + pivot_table_submit (table); } static void summary_report (const struct examine *cmd, int iact_idx) { - const struct interaction *iact = cmd->iacts[iact_idx]; - int i, v; - const int heading_columns = 1 + iact->n_vars; - const int heading_rows = 3; - struct tab_table *t; - - const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0; - - size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); - - const int nr = heading_rows + n_cats * cmd->n_dep_vars; - const int nc = 6 + heading_columns; - - t = tab_create (nc, nr); - tab_title (t, _("Case Processing Summary")); - - tab_headers (t, heading_columns, 0, heading_rows, 0); - - /* Internal Vertical lines */ - tab_box (t, -1, -1, -1, TAL_1, - heading_columns, 0, nc - 1, nr - 1); - - /* External Frame */ - tab_box (t, TAL_2, TAL_2, -1, -1, - 0, 0, nc - 1, nr - 1); - - tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, heading_columns, 0, nr - 1); - - tab_joint_text (t, heading_columns, 0, - nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases")); - tab_joint_text (t, - heading_columns, 1, - heading_columns + 1, 1, - TAB_CENTER | TAT_TITLE, _("Valid")); + struct pivot_table *table = pivot_table_create ( + N_("Case Processing Summary")); + pivot_table_set_weight_var (table, dict_get_weight (cmd->dict)); + + pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"), + N_("N"), PIVOT_RC_COUNT, + N_("Percent"), PIVOT_RC_PERCENT); + struct pivot_dimension *cases = pivot_dimension_create ( + table, PIVOT_AXIS_COLUMN, N_("Cases"), N_("Valid"), N_("Missing"), + N_("Total")); + cases->root->show_label = true; - tab_joint_text (t, - heading_columns + 2, 1, - heading_columns + 3, 1, - TAB_CENTER | TAT_TITLE, _("Missing")); + const struct interaction *iact = cmd->iacts[iact_idx]; + struct pivot_footnote *missing_footnote = create_missing_footnote (table); + create_interaction_dimensions (table, cmd->cats, iact, missing_footnote); - tab_joint_text (t, - heading_columns + 4, 1, - heading_columns + 5, 1, - TAB_CENTER | TAT_TITLE, _("Total")); + struct pivot_dimension *dep_dim = pivot_dimension_create ( + table, PIVOT_AXIS_ROW, N_("Dependent Variables")); - for (i = 0; i < 3; ++i) - { - tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE, - _("N")); - tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE, - _("Percent")); - } + size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes); - for (i = 0; i < iact->n_vars; ++i) + size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + for (size_t v = 0; v < cmd->n_dep_vars; ++v) { - tab_text (t, - 1 + i, 2, - TAT_TITLE, - var_to_string (iact->vars[i]) - ); - } - - if (n_cats > 0) - for (v = 0; v < cmd->n_dep_vars; ++v) - { - int ivar_idx; - const union value **prev_values = previous_value_alloc (iact); - - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats); - - tab_text (t, - 0, heading_rows + n_cats * v, - TAT_TITLE, - var_to_string (cmd->dep_vars[v]) - ); - - - for (i = 0; i < n_cats; ++i) - { - double total; - const struct exploratory_stats *es; - - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, - iact_idx, i); - if (c) - { - int diff_idx = previous_value_record (iact, c, prev_values); - - if ( diff_idx != -1 && diff_idx < iact->n_vars - 1) - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + n_cats * v + i ); - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); - - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - append_value_name (ivar, val, &str); - - tab_text (t, - 1 + ivar_idx, heading_rows + n_cats * v + i, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); - - ds_destroy (&str); - } - } - } - - - es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); - - - total = es[v].missing + es[v].non_missing; - tab_double (t, - heading_columns + 0, - heading_rows + n_cats * v + i, - 0, - es[v].non_missing, - wfmt); - - - tab_text_format (t, - heading_columns + 1, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * es[v].non_missing / total - ); - - - tab_double (t, - heading_columns + 2, - heading_rows + n_cats * v + i, - 0, - es[v].missing, - wfmt); - - tab_text_format (t, - heading_columns + 3, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * es[v].missing / total - ); - tab_double (t, - heading_columns + 4, - heading_rows + n_cats * v + i, - 0, - total, - wfmt); - - /* This can only be 100% can't it? */ - tab_text_format (t, - heading_columns + 5, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * (es[v].missing + es[v].non_missing)/ total - ); - } - free (prev_values); - } - - tab_hline (t, TAL_1, heading_columns, nc - 1, 1); - tab_hline (t, TAL_1, heading_columns, nc - 1, 2); - - tab_submit (t); -} + indexes[table->n_dimensions - 1] = pivot_category_create_leaf ( + dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v])); + for (size_t i = 0; i < n_cats; ++i) + { + for (size_t j = 0; j < iact->n_vars; j++) + { + int idx = categoricals_get_value_index_by_category_real ( + cmd->cats, iact_idx, i, j); + indexes[table->n_dimensions - 2 - j] = idx; + } -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_match_variable (struct lexer *lexer, - const struct dictionary *dict, const struct variable **var) -{ - if (lex_token (lexer) != T_ID) + const struct exploratory_stats *es + = categoricals_get_user_data_by_category_real ( + cmd->cats, iact_idx, i); - return false; + double total = es[v].missing + es[v].non_missing; + struct entry + { + int stat_idx; + int case_idx; + double x; + } + entries[] = { + { 0, 0, es[v].non_missing }, + { 1, 0, 100.0 * es[v].non_missing / total }, + { 0, 1, es[v].missing }, + { 1, 1, 100.0 * es[v].missing / total }, + { 0, 2, total }, + { 1, 2, 100.0 }, + }; + for (size_t j = 0; j < sizeof entries / sizeof *entries; j++) + { + const struct entry *e = &entries[j]; + indexes[0] = e->stat_idx; + indexes[1] = e->case_idx; + pivot_table_put (table, indexes, table->n_dimensions, + pivot_value_new_number (e->x)); + } + } + } - *var = parse_variable_const (lexer, dict); + free (indexes); - if ( *var == NULL) - return false; - return true; + pivot_table_submit (table); } /* Attempt to parse an interaction from LEXER */ @@ -1486,8 +990,8 @@ parse_interaction (struct lexer *lexer, struct examine *ex) { const struct variable *v = NULL; struct interaction *iact = NULL; - - if ( lex_match_variable (lexer, ex->dict, &v)) + + if (lex_match_variable (lexer, ex->dict, &v)) { iact = interaction_create (v); @@ -1502,7 +1006,7 @@ parse_interaction (struct lexer *lexer, struct examine *ex) } lex_match (lexer, T_COMMA); } - + return iact; } @@ -1511,7 +1015,7 @@ static void * create_n (const void *aux1, void *aux2 UNUSED) { int v; - + const struct examine *examine = aux1; struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es)); struct subcase ordering; @@ -1541,13 +1045,33 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, const struct examine *examine = aux1; struct exploratory_stats *es = user_data; + bool this_case_is_missing = false; + /* LISTWISE missing must be dealt with here */ + if (!examine->missing_pw) + { + for (v = 0; v < examine->n_dep_vars; v++) + { + const struct variable *var = examine->dep_vars[v]; + + if (var_is_value_missing (var, case_data (c, var)) + & examine->dep_excl) + { + es[v].missing += weight; + this_case_is_missing = true; + } + } + } + + if (this_case_is_missing) + return; + for (v = 0; v < examine->n_dep_vars; v++) { struct ccase *outcase ; const struct variable *var = examine->dep_vars[v]; - const double x = case_data (c, var)->f; - - if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) + const double x = case_num (c, var); + + if (var_is_value_missing (var, case_data (c, var)) & examine->dep_excl) { es[v].missing += weight; continue; @@ -1567,12 +1091,12 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, /* Save the value and the ID to the writer */ assert (examine->id_idx != -1); - case_data_rw_idx (outcase, EX_VAL)->f = x; + *case_num_rw_idx (outcase, EX_VAL) = x; value_copy (case_data_rw_idx (outcase, EX_ID), case_data_idx (c, examine->id_idx), examine->id_width); - case_data_rw_idx (outcase, EX_WT)->f = weight; - + *case_num_rw_idx (outcase, EX_WT) = weight; + es[v].cc += weight; if (es[v].cmin > weight) @@ -1593,28 +1117,26 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { int i; casenumber imin = 0; - double imax = es[v].cc; + casenumber imax; struct casereader *reader; struct ccase *c; - casenumber total_cases; - if (examine->histogram) + if (examine->plot & PLOT_HISTOGRAM && es[v].non_missing > 0) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) / (1 + log2 (es[v].cc)) ; - bin_width = chart_rounded_tick (bin_width); - es[v].histogram = histogram_create (bin_width, es[v].minimum, es[v].maximum); } es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); - total_cases = casereader_count_cases (es[v].sorted_reader); es[v].sorted_writer = NULL; + imax = casereader_get_n_cases (es[v].sorted_reader); + es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); for (i = 0; i < examine->calc_extremes; ++i) @@ -1622,12 +1144,14 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ; value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ; } - + + bool warn = true; for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) { - const double val = case_data_idx (c, EX_VAL)->f; - const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */ + const double val = case_num_idx (c, EX_VAL); + double wt = case_num_idx (c, EX_WT); + wt = var_force_valid_weight (examine->wv, wt, &warn); moments_pass_two (es[v].mom, val, wt); @@ -1643,19 +1167,19 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) min->val = val; value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } - imin += wt; + imin ++; } - imax -= wt; + imax --; if (imax < examine->calc_extremes) { int x; - for (x = imax; x < imax + wt; ++x) + for (x = imax; x < imax + 1; ++x) { struct extremity *max; - if (x >= examine->calc_extremes) + if (x >= examine->calc_extremes) break; max = &es[v].maxima[x]; @@ -1666,20 +1190,20 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) } casereader_destroy (reader); - if (examine->calc_extremes > 0) + if (examine->calc_extremes > 0 && es[v].non_missing > 0) { assert (es[v].minima[0].val == es[v].minimum); - assert (es[v].maxima[0].val == es[v].maximum); + assert (es[v].maxima[0].val == es[v].maximum); } { const int n_os = 5 + examine->n_percentiles; - struct order_stats **os ; es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles)); es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05); + es[v].shapiro_wilk = NULL; - os = xcalloc (n_os, sizeof *os); + struct order_stats **os = XCALLOC (n_os, struct order_stats *); os[0] = &es[v].trimmed_mean->parent; es[v].quartiles[0] = percentile_create (0.25, es[v].cc); @@ -1706,11 +1230,11 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) free (os); } - if (examine->boxplot) + if (examine->plot & PLOT_BOXPLOT) { struct order_stats *os; - es[v].box_whisker = box_whisker_create (es[v].hinges, + es[v].box_whisker = box_whisker_create (es[v].hinges, EX_ID, examine->id_var); os = &es[v].box_whisker->parent; @@ -1719,13 +1243,30 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) EX_WT, EX_VAL); } - if (examine->npplot) + if (examine->plot) + { + double mean; + + moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL); + + es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean); + + if (es[v].shapiro_wilk) + { + struct order_stats *os = &es[v].shapiro_wilk->parent; + order_stats_accumulate_idx (&os, 1, + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); + } + } + + if (examine->plot & PLOT_NPPLOT) { double n, mean, var; struct order_stats *os; moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL); - + es[v].np = np_create (n, mean, var); os = &es[v].np->parent; @@ -1740,7 +1281,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) static void cleanup_exploratory_stats (struct examine *cmd) -{ +{ int i; for (i = 0; i < cmd->n_iacts; ++i) { @@ -1774,6 +1315,12 @@ cleanup_exploratory_stats (struct examine *cmd) stat->destroy (stat); } + if (es[v].shapiro_wilk) + { + stat = &es[v].shapiro_wilk->parent.parent; + stat->destroy (stat); + } + os = &es[v].trimmed_mean->parent; stat = &os->parent; stat->destroy (stat); @@ -1788,6 +1335,12 @@ cleanup_exploratory_stats (struct examine *cmd) statistic_destroy (&es[v].histogram->parent); moments_destroy (es[v].mom); + if (es[v].box_whisker) + { + stat = &es[v].box_whisker->parent.parent; + stat->destroy (stat); + } + casereader_destroy (es[v].sorted_reader); } } @@ -1805,37 +1358,26 @@ run_examine (struct examine *cmd, struct casereader *input) struct payload payload; payload.create = create_n; payload.update = update_n; - payload.destroy = calculate_n; - + payload.calculate = calculate_n; + payload.destroy = NULL; + cmd->wv = dict_get_weight (cmd->dict); cmd->cats - = categoricals_create (cmd->iacts, cmd->n_iacts, - cmd->wv, cmd->dep_excl, cmd->fctr_excl); + = categoricals_create (cmd->iacts, cmd->n_iacts, cmd->wv, cmd->fctr_excl); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->id_idx == -1) + if (cmd->id_var == NULL) { struct ccase *c = casereader_peek (input, 0); - assert (cmd->id_var == NULL); - - cmd->id_idx = case_get_value_cnt (c); + cmd->id_idx = case_get_n_values (c); input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } - /* Remove cases on a listwise basis if requested */ - if ( cmd->missing_pw == false) - input = casereader_create_filter_missing (input, - cmd->dep_vars, - cmd->n_dep_vars, - cmd->dep_excl, - NULL, - NULL); - for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { @@ -1848,13 +1390,17 @@ run_examine (struct examine *cmd, struct casereader *input) { summary_report (cmd, i); + const size_t n_cats = categoricals_n_count (cmd->cats, i); + if (n_cats == 0) + continue; + if (cmd->disp_extremes > 0) extremes_report (cmd, i); if (cmd->n_percentiles > 0) percentiles_report (cmd, i); - if (cmd->boxplot) + if (cmd->plot & PLOT_BOXPLOT) { switch (cmd->boxplot_mode) { @@ -1870,17 +1416,20 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogram) + if (cmd->plot & PLOT_HISTOGRAM) show_histogram (cmd, i); - if (cmd->npplot) + if (cmd->plot & PLOT_NPPLOT) show_npplot (cmd, i); - if (cmd->spreadlevel) + if (cmd->plot & PLOT_SPREADLEVEL) show_spreadlevel (cmd, i); if (cmd->descriptives) descriptives_report (cmd, i); + + if (cmd->plot) + normality_report (cmd, i); } cleanup_exploratory_stats (cmd); @@ -1911,7 +1460,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.id_width = 0; examine.id_var = NULL; examine.boxplot_mode = BP_GROUPS; - + examine.ex_proto = caseproto_create (); examine.pool = pool_create (); @@ -1927,19 +1476,17 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.dep_excl = MV_ANY; examine.fctr_excl = MV_ANY; - examine.histogram = false; - examine.npplot = false; - examine.boxplot = false; - examine.spreadlevel = false; + examine.plot = 0; examine.sl_power = 0; - + examine.dep_vars = NULL; + examine.n_dep_vars = 0; examine.dict = dataset_dict (ds); /* Accept an optional, completely pointless "/VARIABLES=" */ lex_match (lexer, T_SLASH); if (lex_match_id (lexer, "VARIABLES")) { - if (! lex_force_match (lexer, T_EQUALS) ) + if (! lex_force_match (lexer, T_EQUALS)) goto error; } @@ -1957,11 +1504,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if (iact) { examine.n_iacts++; - iacts_mem = + iacts_mem = pool_nrealloc (examine.pool, iacts_mem, examine.n_iacts, sizeof (*iacts_mem)); - + iacts_mem[examine.n_iacts - 1] = iact; } } @@ -1989,14 +1536,10 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) int extr = 5; if (lex_match (lexer, T_LPAREN)) { + if (!lex_force_int_range (lexer, "EXTREME", 0, INT_MAX)) + goto error; extr = lex_integer (lexer); - if (extr < 0) - { - msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0); - extr = 5; - } - lex_get (lexer); if (! lex_force_match (lexer, T_RPAREN)) goto error; @@ -2025,14 +1568,9 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { while (lex_is_number (lexer)) { + if (!lex_force_num_range_open (lexer, "PERCENTILES", 0, 100)) + goto error; double p = lex_number (lexer); - - if ( p <= 0 || p >= 100.0) - { - lex_error (lexer, - _("Percentiles must lie in the range (0, 100)")); - goto error; - } examine.n_percentiles++; examine.ptiles = @@ -2118,7 +1656,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "REPORT")) { - examine.fctr_excl = MV_NEVER; + examine.fctr_excl = 0; } else if (lex_match_id (lexer, "NOREPORT")) { @@ -2157,23 +1695,23 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "BOXPLOT")) { - examine.boxplot = true; + examine.plot |= PLOT_BOXPLOT; } else if (lex_match_id (lexer, "NPPLOT")) { - examine.npplot = true; + examine.plot |= PLOT_NPPLOT; } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogram = true; + examine.plot |= PLOT_HISTOGRAM; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevel = true; + examine.plot |= PLOT_SPREADLEVEL; examine.sl_power = 0; - if (lex_match (lexer, T_LPAREN)) + if (lex_match (lexer, T_LPAREN) && lex_force_num (lexer)) { - examine.sl_power = lex_integer (lexer); + examine.sl_power = lex_number (lexer); lex_get (lexer); if (! lex_force_match (lexer, T_RPAREN)) @@ -2182,29 +1720,25 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogram = false; - examine.npplot = false; - examine.boxplot = false; + examine.plot = 0; } else if (lex_match (lexer, T_ALL)) { - examine.histogram = true; - examine.npplot = true; - examine.boxplot = true; + examine.plot = ~0; } - else + else { lex_error (lexer, NULL); goto error; } lex_match (lexer, T_COMMA); - } + } } else if (lex_match_id (lexer, "CINTERVAL")) { - if ( !lex_force_num (lexer)) + if (!lex_force_num (lexer)) goto error; - + examine.conf = lex_number (lexer); lex_get (lexer); } @@ -2222,15 +1756,15 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } - if ( totals_seen && nototals_seen) + if (totals_seen && nototals_seen) { - msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); goto error; } /* If totals have been requested or if there are no factors in this analysis, then the totals need to be included. */ - if ( !nototals_seen || examine.n_iacts == 1) + if (!nototals_seen || examine.n_iacts == 1) { examine.iacts = &iacts_mem[0]; } @@ -2238,10 +1772,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { examine.n_iacts--; examine.iacts = &iacts_mem[1]; + interaction_destroy (iacts_mem[0]); } - if ( examine.id_var ) + if (examine.id_var) { examine.id_idx = var_get_case_index (examine.id_var); examine.id_width = var_get_width (examine.id_var); @@ -2266,8 +1801,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if (percentiles_seen && examine.n_percentiles == 0) { examine.n_percentiles = 7; - examine.ptiles = xcalloc (examine.n_percentiles, - sizeof (*examine.ptiles)); + examine.ptiles = xcalloc (examine.n_percentiles, sizeof (*examine.ptiles)); examine.ptiles[0] = 5; examine.ptiles[1] = 10; @@ -2283,7 +1817,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) struct casegrouper *grouper; struct casereader *group; bool ok; - + grouper = casegrouper_create_splits (proc_open (ds), examine.dict); while (casegrouper_get_next_group (grouper, &group)) run_examine (&examine, group); @@ -2295,7 +1829,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) for (i = 0; i < examine.n_iacts; ++i) interaction_destroy (examine.iacts[i]); - free (examine.ptiles); free (examine.dep_vars); pool_destroy (examine.pool);