X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.c;h=5d308f11abe15d287bb2507318fadd41faaaf766;hb=897c1977501bad5b68f7cc6248a8b950fe15048a;hp=780ecda158a009e4382b0bbe0ff9d2c477823445;hpb=b4966f4dbc35894e029deec0d49f279e55b40067;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 780ecda158..5d308f11ab 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -40,6 +40,7 @@ #include "math/interaction.h" #include "math/box-whisker.h" #include "math/categoricals.h" +#include "math/chart-geometry.h" #include "math/histogram.h" #include "math/moments.h" #include "math/np.h" @@ -51,6 +52,7 @@ #include "output/charts/boxplot.h" #include "output/charts/np-plot.h" +#include "output/charts/spreadlevel-plot.h" #include "output/charts/plot-hist.h" #include "language/command.h" @@ -64,6 +66,14 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid +static void +append_value_name (const struct variable *var, const union value *val, struct string *str) +{ + var_append_value_name (var, val, str); + if ( var_is_value_missing (var, val, MV_ANY)) + ds_put_cstr (str, _(" (missing)")); +} + enum bp_mode { BP_GROUPS, @@ -94,7 +104,8 @@ struct examine size_t n_iacts; struct interaction **iacts; - enum mv_class exclude; + enum mv_class dep_excl; + enum mv_class fctr_excl; const struct dictionary *dict; @@ -109,11 +120,9 @@ struct examine bool missing_pw; - /* Test options require that casenumbers are known */ - bool casenumbers; - /* The case index of the ID value (or -1) if not applicable */ size_t id_idx; + int id_width; enum pc_alg pc_alg; double *ptiles; @@ -122,6 +131,8 @@ struct examine bool npplot; bool histogram; bool boxplot; + bool spreadlevel; + int sl_power; enum bp_mode boxplot_mode; @@ -137,7 +148,7 @@ struct extremity /* Either the casenumber or the value of the variable specified by the /ID subcommand which corresponds to this extremity */ - double identity; + union value identity; }; struct exploratory_stats @@ -186,9 +197,10 @@ struct exploratory_stats }; -static -const union value ** -xxx0 (const struct interaction *iact) +/* Returns an array of (iact->n_vars) pointers to union value initialised to NULL. + The caller must free this array when no longer required. */ +static const union value ** +previous_value_alloc (const struct interaction *iact) { int ivar_idx; @@ -200,8 +212,9 @@ xxx0 (const struct interaction *iact) return prev_val; } +/* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */ static int -xxx1 (const struct interaction *iact, const struct ccase *c, const union value **prev_val) +previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val) { int ivar_idx; int diff_idx = -1; @@ -296,7 +309,7 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -324,7 +337,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) double y_max = -DBL_MAX; const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp); + categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp); struct string title; ds_init_empty (&title); @@ -355,7 +368,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -422,7 +435,7 @@ show_npplot (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -454,6 +467,58 @@ show_npplot (const struct examine *cmd, int iact_idx) } } +static void +show_spreadlevel (const struct examine *cmd, int iact_idx) +{ + const struct interaction *iact = cmd->iacts[iact_idx]; + const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx); + + int v; + + /* Spreadlevel when there are no levels is not useful */ + if (iact->n_vars == 0) + return; + + for (v = 0; v < cmd->n_dep_vars; ++v) + { + int grp; + struct chart_item *sl; + + struct string label; + ds_init_cstr (&label, + var_to_string (cmd->dep_vars[v])); + + if (iact->n_vars > 0) + { + ds_put_cstr (&label, " ("); + interaction_to_string (iact, &label); + ds_put_cstr (&label, ")"); + } + + sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power); + + for (grp = 0; grp < n_cats; ++grp) + { + const struct exploratory_stats *es = + categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); + + double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg); + + double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) - + percentile_calculate (es[v].quartiles[0], cmd->pc_alg); + + spreadlevel_plot_add (sl, iqr, median); + } + + if (sl == NULL) + msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label)); + else + chart_item_submit (sl); + + ds_destroy (&label); + } +} + static void show_histogram (const struct examine *cmd, int iact_idx) @@ -478,6 +543,10 @@ show_histogram (const struct examine *cmd, int iact_idx) categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); struct string label; + + if (es[v].histogram == NULL) + continue; + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); @@ -491,7 +560,7 @@ show_histogram (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -574,119 +643,119 @@ percentiles_report (const struct examine *cmd, int iact_idx) if (n_cats > 0) { - tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1); + tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1); - for (v = 0; v < cmd->n_dep_vars; ++v) - { - const union value **prev_vals = xxx0 (iact); + for (v = 0; v < cmd->n_dep_vars; ++v) + { + const union value **prev_vals = previous_value_alloc (iact); - int ivar_idx; - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); + int ivar_idx; + if ( v > 0 ) + tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - tab_text (t, - 0, heading_rows + v * rows_per_var, - TAT_TITLE | TAB_LEFT, - var_to_string (cmd->dep_vars[v]) - ); - - for (i = 0; i < n_cats; ++i) - { - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, - iact_idx, i); + tab_text (t, + 0, heading_rows + v * rows_per_var, + TAT_TITLE | TAB_LEFT, + var_to_string (cmd->dep_vars[v]) + ); - const struct exploratory_stats *ess = - categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); + for (i = 0; i < n_cats; ++i) + { + const struct ccase *c = + categoricals_get_case_by_category_real (cmd->cats, + iact_idx, i); - const struct exploratory_stats *es = ess + v; + const struct exploratory_stats *ess = + categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); - int diff_idx = xxx1 (iact, c, prev_vals); + const struct exploratory_stats *es = ess + v; - double hinges[3]; - int p; + int diff_idx = previous_value_record (iact, c, prev_vals); - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); + double hinges[3]; + int p; - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) + { + const struct variable *ivar = iact->vars[ivar_idx]; + const union value *val = case_data (c, ivar); + + if (( diff_idx != -1 && diff_idx <= ivar_idx) + || i == 0) + { + struct string str; + ds_init_empty (&str); + append_value_name (ivar, val, &str); - tab_text (t, - 1 + ivar_idx, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); + tab_text (t, + 1 + ivar_idx, + heading_rows + v * rows_per_var + i * rows_per_cat, + TAT_TITLE | TAB_LEFT, + ds_cstr (&str) + ); - ds_destroy (&str); - } - } + ds_destroy (&str); + } + } - if ( diff_idx != -1 && diff_idx < iact->n_vars) - { - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + v * rows_per_var + i * rows_per_cat - ); - } + if ( diff_idx != -1 && diff_idx < iact->n_vars) + { + tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, + heading_rows + v * rows_per_var + i * rows_per_cat + ); + } - tab_text (t, heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat, - TAT_TITLE | TAB_LEFT, - gettext (ptile_alg_desc [cmd->pc_alg])); + tab_text (t, heading_columns - 1, + heading_rows + v * rows_per_var + i * rows_per_cat, + TAT_TITLE | TAB_LEFT, + gettext (ptile_alg_desc [cmd->pc_alg])); - tukey_hinges_calculate (es->hinges, hinges); + tukey_hinges_calculate (es->hinges, hinges); - for (p = 0; p < cmd->n_percentiles; ++p) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat, - 0, - percentile_calculate (es->percentiles[p], cmd->pc_alg), - 0); + for (p = 0; p < cmd->n_percentiles; ++p) + { + tab_double (t, heading_columns + p, + heading_rows + v * rows_per_var + i * rows_per_cat, + 0, + percentile_calculate (es->percentiles[p], cmd->pc_alg), + 0); - if (cmd->ptiles[p] == 25.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[0], - 0); - } - else if (cmd->ptiles[p] == 50.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[1], - 0); - } - else if (cmd->ptiles[p] == 75.0) - { - tab_double (t, heading_columns + p, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, - hinges[2], - 0); - } - } + if (cmd->ptiles[p] == 25.0) + { + tab_double (t, heading_columns + p, + heading_rows + v * rows_per_var + i * rows_per_cat + 1, + 0, + hinges[0], + 0); + } + else if (cmd->ptiles[p] == 50.0) + { + tab_double (t, heading_columns + p, + heading_rows + v * rows_per_var + i * rows_per_cat + 1, + 0, + hinges[1], + 0); + } + else if (cmd->ptiles[p] == 75.0) + { + tab_double (t, heading_columns + p, + heading_rows + v * rows_per_var + i * rows_per_cat + 1, + 0, + hinges[2], + 0); + } + } - tab_text (t, heading_columns - 1, - heading_rows + v * rows_per_var + i * rows_per_cat + 1, - TAT_TITLE | TAB_LEFT, - _("Tukey's Hinges")); + tab_text (t, heading_columns - 1, + heading_rows + v * rows_per_var + i * rows_per_cat + 1, + TAT_TITLE | TAB_LEFT, + _("Tukey's Hinges")); - } + } - free (prev_vals); - } + free (prev_vals); + } } tab_submit (t); } @@ -742,7 +811,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { - const union value **prev_val = xxx0 (iact); + const union value **prev_val = previous_value_alloc (iact); int ivar_idx; if ( v > 0 ) @@ -765,7 +834,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) const struct exploratory_stats *es = ess + v; - const int diff_idx = xxx1 (iact, c, prev_val); + const int diff_idx = previous_value_record (iact, c, prev_val); double m0, m1, m2, m3, m4; double tval; @@ -784,7 +853,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -1065,7 +1134,7 @@ extremes_report (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { - const union value **prev_val = xxx0 (iact); + const union value **prev_val = previous_value_alloc (iact); int ivar_idx; if ( v > 0 ) @@ -1088,7 +1157,7 @@ extremes_report (const struct examine *cmd, int iact_idx) const struct exploratory_stats *es = ess + v; - int diff_idx = xxx1 (iact, c, prev_val); + int diff_idx = previous_value_record (iact, c, prev_val); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { @@ -1100,7 +1169,7 @@ extremes_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -1147,22 +1216,29 @@ extremes_report (const struct examine *cmd, int iact_idx) &F_8_0); /* The casenumber */ - tab_double (t, + if (cmd->id_var) + tab_value (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + e, + TAB_RIGHT, + &es->maxima[e].identity, + cmd->id_var, + NULL); + else + tab_double (t, heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - 0, - es->maxima[e].identity, - &F_8_0); - + heading_rows + v * rows_per_var + i * rows_per_cat + e, + TAB_RIGHT, + es->maxima[e].identity.f, + &F_8_0); tab_double (t, - heading_columns + 1, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - 0, - es->maxima[e].val, - 0); - - + heading_columns + 1, + heading_rows + v * rows_per_var + i * rows_per_cat + e, + 0, + es->maxima[e].val, + var_get_print_format (cmd->dep_vars[v])); + tab_double (t, heading_columns - 1, @@ -1172,19 +1248,28 @@ extremes_report (const struct examine *cmd, int iact_idx) &F_8_0); /* The casenumber */ - tab_double (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - 0, - es->minima[e].identity, - &F_8_0); + if (cmd->id_var) + tab_value (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, + TAB_RIGHT, + &es->minima[e].identity, + cmd->id_var, + NULL); + else + tab_double (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, + TAB_RIGHT, + es->minima[e].identity.f, + &F_8_0); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, 0, es->minima[e].val, - 0); + var_get_print_format (cmd->dep_vars[v])); } } free (prev_val); @@ -1261,114 +1346,114 @@ summary_report (const struct examine *cmd, int iact_idx) } if (n_cats > 0) - for (v = 0; v < cmd->n_dep_vars; ++v) - { - int ivar_idx; - const union value **prev_values = xxx0 (iact); - - if ( v > 0 ) - tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats); - - tab_text (t, - 0, heading_rows + n_cats * v, - TAT_TITLE, - var_to_string (cmd->dep_vars[v]) - ); - - - for (i = 0; i < n_cats; ++i) - { - double total; - const struct exploratory_stats *es; - - const struct ccase *c = - categoricals_get_case_by_category_real (cmd->cats, - iact_idx, i); - if (c) - { - int diff_idx = xxx1 (iact, c, prev_values); - - if ( diff_idx != -1 && diff_idx < iact->n_vars - 1) - tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, - heading_rows + n_cats * v + i ); - - for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) - { - const struct variable *ivar = iact->vars[ivar_idx]; - const union value *val = case_data (c, ivar); - - if (( diff_idx != -1 && diff_idx <= ivar_idx) - || i == 0) - { - struct string str; - ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + for (v = 0; v < cmd->n_dep_vars; ++v) + { + int ivar_idx; + const union value **prev_values = previous_value_alloc (iact); + + if ( v > 0 ) + tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats); + + tab_text (t, + 0, heading_rows + n_cats * v, + TAT_TITLE, + var_to_string (cmd->dep_vars[v]) + ); + + + for (i = 0; i < n_cats; ++i) + { + double total; + const struct exploratory_stats *es; + + const struct ccase *c = + categoricals_get_case_by_category_real (cmd->cats, + iact_idx, i); + if (c) + { + int diff_idx = previous_value_record (iact, c, prev_values); + + if ( diff_idx != -1 && diff_idx < iact->n_vars - 1) + tab_hline (t, TAL_1, 1 + diff_idx, nc - 1, + heading_rows + n_cats * v + i ); + + for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) + { + const struct variable *ivar = iact->vars[ivar_idx]; + const union value *val = case_data (c, ivar); + + if (( diff_idx != -1 && diff_idx <= ivar_idx) + || i == 0) + { + struct string str; + ds_init_empty (&str); + append_value_name (ivar, val, &str); - tab_text (t, - 1 + ivar_idx, heading_rows + n_cats * v + i, - TAT_TITLE | TAB_LEFT, - ds_cstr (&str) - ); + tab_text (t, + 1 + ivar_idx, heading_rows + n_cats * v + i, + TAT_TITLE | TAB_LEFT, + ds_cstr (&str) + ); - ds_destroy (&str); - } - } - } + ds_destroy (&str); + } + } + } - es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); + es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); - total = es[v].missing + es[v].non_missing; - tab_double (t, - heading_columns + 0, - heading_rows + n_cats * v + i, - 0, - es[v].non_missing, - wfmt); - - - tab_text_format (t, - heading_columns + 1, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * es[v].non_missing / total - ); - - - tab_double (t, - heading_columns + 2, - heading_rows + n_cats * v + i, - 0, - es[v].missing, - wfmt); - - tab_text_format (t, - heading_columns + 3, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * es[v].missing / total - ); - tab_double (t, - heading_columns + 4, - heading_rows + n_cats * v + i, - 0, - total, - wfmt); - - /* This can only be 100% can't it? */ - tab_text_format (t, - heading_columns + 5, - heading_rows + n_cats * v + i, - 0, - "%g%%", - 100.0 * (es[v].missing + es[v].non_missing)/ total - ); - } - free (prev_values); - } + total = es[v].missing + es[v].non_missing; + tab_double (t, + heading_columns + 0, + heading_rows + n_cats * v + i, + 0, + es[v].non_missing, + wfmt); + + + tab_text_format (t, + heading_columns + 1, + heading_rows + n_cats * v + i, + 0, + "%g%%", + 100.0 * es[v].non_missing / total + ); + + + tab_double (t, + heading_columns + 2, + heading_rows + n_cats * v + i, + 0, + es[v].missing, + wfmt); + + tab_text_format (t, + heading_columns + 3, + heading_rows + n_cats * v + i, + 0, + "%g%%", + 100.0 * es[v].missing / total + ); + tab_double (t, + heading_columns + 4, + heading_rows + n_cats * v + i, + 0, + total, + wfmt); + + /* This can only be 100% can't it? */ + tab_text_format (t, + heading_columns + 5, + heading_rows + n_cats * v + i, + 0, + "%g%%", + 100.0 * (es[v].missing + es[v].non_missing)/ total + ); + } + free (prev_values); + } tab_hline (t, TAL_1, heading_columns, nc - 1, 1); tab_hline (t, TAL_1, heading_columns, nc - 1, 2); @@ -1376,25 +1461,6 @@ summary_report (const struct examine *cmd, int iact_idx) tab_submit (t); } - -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_match_variable (struct lexer *lexer, - const struct dictionary *dict, const struct variable **var) -{ - if (lex_token (lexer) != T_ID) - - return false; - - *var = parse_variable_const (lexer, dict); - - if ( *var == NULL) - return false; - return true; -} - /* Attempt to parse an interaction from LEXER */ static struct interaction * parse_interaction (struct lexer *lexer, struct examine *ex) @@ -1458,16 +1524,17 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, for (v = 0; v < examine->n_dep_vars; v++) { + struct ccase *outcase ; const struct variable *var = examine->dep_vars[v]; const double x = case_data (c, var)->f; - if (var_is_value_missing (var, case_data (c, var), examine->exclude)) + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) { es[v].missing += weight; continue; } - struct ccase *outcase = case_create (examine->ex_proto); + outcase = case_create (examine->ex_proto); if (x > es[v].maximum) es[v].maximum = x; @@ -1479,10 +1546,11 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, moments_pass_one (es[v].mom, x, weight); - /* Save the value and the casenumber to the writer */ + /* Save the value and the ID to the writer */ + assert (examine->id_idx != -1); case_data_rw_idx (outcase, EX_VAL)->f = x; - if ( examine->id_idx != -1) - case_data_rw_idx (outcase, EX_ID)->f = case_data_idx (c, examine->id_idx)->f; + value_copy (case_data_rw_idx (outcase, EX_ID), + case_data_idx (c, examine->id_idx), examine->id_width); case_data_rw_idx (outcase, EX_WT)->f = weight; @@ -1509,20 +1577,28 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) double imax = es[v].cc; struct casereader *reader; struct ccase *c; - casenumber total_cases; if (examine->histogram) { + /* Sturges Rule */ + double bin_width = fabs (es[v].minimum - es[v].maximum) + / (1 + log2 (es[v].cc)) + ; + es[v].histogram = - histogram_create (10, es[v].minimum, es[v].maximum); + histogram_create (bin_width, es[v].minimum, es[v].maximum); } es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); - total_cases = casereader_count_cases (es[v].sorted_reader); es[v].sorted_writer = NULL; es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); + for (i = 0; i < examine->calc_extremes; ++i) + { + value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ; + value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ; + } for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) @@ -1542,7 +1618,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { struct extremity *min = &es[v].minima[x]; min->val = val; - min->identity = case_data_idx (c, EX_ID)->f; + value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } imin += wt; } @@ -1561,7 +1637,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) max = &es[v].maxima[x]; max->val = val; - max->identity = case_data_idx (c, EX_ID)->f; + value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width); } } } @@ -1574,37 +1650,37 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) } { - const int n_os = 5 + examine->n_percentiles; - struct order_stats **os ; - es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles)); + const int n_os = 5 + examine->n_percentiles; + struct order_stats **os ; + es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles)); - es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05); + es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05); - os = xcalloc (n_os, sizeof *os); - os[0] = &es[v].trimmed_mean->parent; + os = xcalloc (n_os, sizeof *os); + os[0] = &es[v].trimmed_mean->parent; - es[v].quartiles[0] = percentile_create (0.25, es[v].cc); - es[v].quartiles[1] = percentile_create (0.5, es[v].cc); - es[v].quartiles[2] = percentile_create (0.75, es[v].cc); + es[v].quartiles[0] = percentile_create (0.25, es[v].cc); + es[v].quartiles[1] = percentile_create (0.5, es[v].cc); + es[v].quartiles[2] = percentile_create (0.75, es[v].cc); - os[1] = &es[v].quartiles[0]->parent; - os[2] = &es[v].quartiles[1]->parent; - os[3] = &es[v].quartiles[2]->parent; + os[1] = &es[v].quartiles[0]->parent; + os[2] = &es[v].quartiles[1]->parent; + os[3] = &es[v].quartiles[2]->parent; - es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin); - os[4] = &es[v].hinges->parent; + es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin); + os[4] = &es[v].hinges->parent; - for (i = 0; i < examine->n_percentiles; ++i) - { - es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc); - os[5 + i] = &es[v].percentiles[i]->parent; - } + for (i = 0; i < examine->n_percentiles; ++i) + { + es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc); + os[5 + i] = &es[v].percentiles[i]->parent; + } - order_stats_accumulate_idx (os, n_os, - casereader_clone (es[v].sorted_reader), - EX_WT, EX_VAL); + order_stats_accumulate_idx (os, n_os, + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); - free (os); + free (os); } if (examine->boxplot) @@ -1612,12 +1688,12 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct order_stats *os; es[v].box_whisker = box_whisker_create (es[v].hinges, - EX_ID); + EX_ID, examine->id_var); os = &es[v].box_whisker->parent; order_stats_accumulate_idx (&os, 1, - casereader_clone (es[v].sorted_reader), - EX_WT, EX_VAL); + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); } if (examine->npplot) @@ -1632,8 +1708,8 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) os = &es[v].np->parent; order_stats_accumulate_idx (&os, 1, - casereader_clone (es[v].sorted_reader), - EX_WT, EX_VAL); + casereader_clone (es[v].sorted_reader), + EX_WT, EX_VAL); } } @@ -1657,42 +1733,36 @@ cleanup_exploratory_stats (struct examine *cmd) const struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, i, grp); - struct order_stats *os = es[v].hinges; + struct order_stats *os = &es[v].hinges->parent; struct statistic *stat = &os->parent; stat->destroy (stat); for (q = 0; q < 3 ; q++) { - os = es[v].quartiles[q]; + os = &es[v].quartiles[q]->parent; stat = &os->parent; stat->destroy (stat); } for (q = 0; q < cmd->n_percentiles ; q++) { - os = es[v].percentiles[q]; + os = &es[v].percentiles[q]->parent; stat = &os->parent; stat->destroy (stat); } - os = es[v].trimmed_mean; + os = &es[v].trimmed_mean->parent; stat = &os->parent; stat->destroy (stat); - os = es[v].np; - if (os) - { - stat = &os->parent; - stat->destroy (stat); - } - - os = es[v].histogram; + os = &es[v].np->parent; if (os) { stat = &os->parent; stat->destroy (stat); } + statistic_destroy (&es[v].histogram->parent); moments_destroy (es[v].mom); casereader_destroy (es[v].sorted_reader); @@ -1712,40 +1782,35 @@ run_examine (struct examine *cmd, struct casereader *input) struct payload payload; payload.create = create_n; payload.update = update_n; - payload.destroy = calculate_n; + payload.calculate = calculate_n; + payload.destroy = NULL; cmd->wv = dict_get_weight (cmd->dict); - cmd->id_idx = -1; cmd->cats = categoricals_create (cmd->iacts, cmd->n_iacts, - cmd->wv, cmd->exclude); + cmd->wv, cmd->dep_excl, cmd->fctr_excl); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->casenumbers) + if (cmd->id_idx == -1) { struct ccase *c = casereader_peek (input, 0); - if (cmd->id_var) - cmd->id_idx = var_get_case_index (cmd->id_var); - else - { - cmd->id_idx = case_get_value_cnt (c); - input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); - } + assert (cmd->id_var == NULL); + + cmd->id_idx = case_get_value_cnt (c); + input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } - /* FIXME: Filter out missing factor variables */ - /* Remove cases on a listwise basis if requested */ if ( cmd->missing_pw == false) input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, - cmd->exclude, + cmd->dep_excl, NULL, NULL); @@ -1789,6 +1854,9 @@ run_examine (struct examine *cmd, struct casereader *input) if (cmd->npplot) show_npplot (cmd, i); + if (cmd->spreadlevel) + show_spreadlevel (cmd, i); + if (cmd->descriptives) descriptives_report (cmd, i); } @@ -1809,7 +1877,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) struct examine examine; bool percentiles_seen = false; - examine.casenumbers = false; examine.missing_pw = false; examine.disp_extremes = 0; examine.calc_extremes = 0; @@ -1818,13 +1885,12 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.pc_alg = PC_HAVERAGE; examine.ptiles = NULL; examine.n_percentiles = 0; - examine.id_var = 0; + examine.id_idx = -1; + examine.id_width = 0; + examine.id_var = NULL; examine.boxplot_mode = BP_GROUPS; examine.ex_proto = caseproto_create (); - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */ - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* id */ - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */ examine.pool = pool_create (); @@ -1837,10 +1903,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *)); examine.iacts[0] = interaction_create (NULL); - examine.exclude = MV_ANY; + examine.dep_excl = MV_ANY; + examine.fctr_excl = MV_ANY; examine.histogram = false; examine.npplot = false; examine.boxplot = false; + examine.spreadlevel = false; + examine.sl_power = 0; examine.dict = dataset_dict (ds); @@ -2019,11 +2088,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "EXCLUDE")) { - examine.exclude = MV_ANY; + examine.dep_excl = MV_ANY; } else if (lex_match_id (lexer, "INCLUDE")) { - examine.exclude = MV_SYSTEM; + examine.dep_excl = MV_SYSTEM; + } + else if (lex_match_id (lexer, "REPORT")) + { + examine.fctr_excl = MV_NEVER; + } + else if (lex_match_id (lexer, "NOREPORT")) + { + examine.fctr_excl = MV_ANY; } else { @@ -2068,6 +2145,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { examine.histogram = true; } + else if (lex_match_id (lexer, "SPREADLEVEL")) + { + examine.spreadlevel = true; + examine.sl_power = 0; + if (lex_match (lexer, T_LPAREN)) + { + examine.sl_power = lex_integer (lexer); + + lex_get (lexer); + if (! lex_force_match (lexer, T_RPAREN)) + goto error; + } + } else if (lex_match_id (lexer, "NONE")) { examine.histogram = false; @@ -2109,6 +2199,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } } + if ( totals_seen && nototals_seen) { msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); @@ -2125,21 +2216,26 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { examine.n_iacts--; examine.iacts = &iacts_mem[1]; + interaction_destroy (iacts_mem[0]); } - if (examine.disp_extremes > 0) + if ( examine.id_var ) { - examine.calc_extremes = examine.disp_extremes; - examine.casenumbers = true; + examine.id_idx = var_get_case_index (examine.id_var); + examine.id_width = var_get_width (examine.id_var); } - if (examine.boxplot) + examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */ + examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */ + examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */ + + + if (examine.disp_extremes > 0) { - examine.casenumbers = true; + examine.calc_extremes = examine.disp_extremes; } - if (examine.descriptives && examine.calc_extremes == 0) { /* Descriptives always displays the max and min */ @@ -2176,9 +2272,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) caseproto_unref (examine.ex_proto); - for (i = 0; i < examine.n_iacts; ++i) - interaction_destroy (examine.iacts[i]); - free (examine.ptiles); free (examine.dep_vars); pool_destroy (examine.pool); @@ -2187,6 +2280,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) error: caseproto_unref (examine.ex_proto); + examine.iacts = iacts_mem; for (i = 0; i < examine.n_iacts; ++i) interaction_destroy (examine.iacts[i]); free (examine.dep_vars);