X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.c;h=b8c518c60bc729f95edfa16edf4a985133032cd8;hb=3dd0f6ae0d5eb73a2270a243e443c4ae03c2c16e;hp=46b27f9d360ac34b1161a69de910aadb18eaba43;hpb=ab057ea64a9d4f712a537d183bbdc04d2bb24fbe;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 46b27f9d36..b8c518c60b 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,7 +1,7 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012 Free Software Foundation, Inc. - + Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc. + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or @@ -11,7 +11,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see . */ @@ -66,7 +66,7 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -static void +static void append_value_name (const struct variable *var, const union value *val, struct string *str) { var_append_value_name (var, val, str); @@ -127,11 +127,11 @@ struct examine enum pc_alg pc_alg; double *ptiles; size_t n_percentiles; - + bool npplot; - bool histogram; + bool histogramplot; bool boxplot; - bool spreadlevel; + bool spreadlevelplot; int sl_power; enum bp_mode boxplot_mode; @@ -165,7 +165,7 @@ struct exploratory_stats struct extremity *minima; struct extremity *maxima; - /* + /* Minimum should alway equal mimima[0].val. Likewise, maximum should alway equal maxima[0].val. This redundancy exists as an optimisation effort. @@ -224,7 +224,7 @@ previous_value_record (const struct interaction *iact, const struct ccase *c, co const struct variable *ivar = iact->vars[ivar_idx]; const int width = var_get_width (ivar); const union value *val = case_data (c, ivar); - + if (prev_val[ivar_idx]) if (! value_equal (prev_val[ivar_idx], val, width)) { @@ -237,7 +237,7 @@ previous_value_record (const struct interaction *iact, const struct ccase *c, co { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + prev_val[ivar_idx] = val; } return diff_idx; @@ -273,7 +273,7 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) } else ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v])); - + for (grp = 0; grp < n_cats; ++grp) { const struct exploratory_stats *es = @@ -285,7 +285,7 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) if ( y_max < es[v].maximum) y_max = es[v].maximum; } - + boxplot = boxplot_create (y_min, y_max, ds_cstr (&title)); ds_destroy (&title); @@ -298,26 +298,33 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) const struct ccase *c = categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp); - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); ds_init_empty (&label); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { + struct string l; const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - - ds_put_cstr (&label, var_to_string (ivar)); - ds_put_cstr (&label, " = "); - append_value_name (ivar, val, &label); - ds_put_cstr (&label, "; "); + ds_init_empty (&l); + + append_value_name (ivar, val, &l); + ds_ltrim (&l, ss_cstr (" ")); + + ds_put_substring (&label, l.ss); + if (ivar_idx < iact->n_vars - 1) + ds_put_cstr (&label, "; "); + + ds_destroy (&l); } boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label)); + es[v].box_whisker = NULL; ds_destroy (&label); } - + boxplot_submit (boxplot); } } @@ -365,7 +372,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); @@ -384,11 +391,12 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); - boxplot_add_box (boxplot, es[v].box_whisker, + boxplot_add_box (boxplot, es[v].box_whisker, var_to_string (cmd->dep_vars[v])); + es[v].box_whisker = NULL; } boxplot_submit (boxplot); @@ -422,7 +430,7 @@ show_npplot (const struct examine *cmd, int iact_idx) categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); struct string label; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); if ( iact->n_vars > 0) @@ -432,16 +440,16 @@ show_npplot (const struct examine *cmd, int iact_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); - + } ds_put_cstr (&label, ")"); } - + np = es[v].np; reader = casewriter_make_reader (np->writer); np->writer = NULL; @@ -485,7 +493,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) struct chart_item *sl; struct string label; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); if (iact->n_vars > 0) @@ -494,7 +502,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) interaction_to_string (iact, &label); ds_put_cstr (&label, ")"); } - + sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power); for (grp = 0; grp < n_cats; ++grp) @@ -512,7 +520,7 @@ show_spreadlevel (const struct examine *cmd, int iact_idx) if (sl == NULL) msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label)); - else + else chart_item_submit (sl); ds_destroy (&label); @@ -547,7 +555,7 @@ show_histogram (const struct examine *cmd, int iact_idx) if (es[v].histogram == NULL) continue; - ds_init_cstr (&label, + ds_init_cstr (&label, var_to_string (cmd->dep_vars[v])); if ( iact->n_vars > 0) @@ -557,12 +565,12 @@ show_histogram (const struct examine *cmd, int iact_idx) { const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - + ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); - + } ds_put_cstr (&label, ")"); } @@ -575,7 +583,7 @@ show_histogram (const struct examine *cmd, int iact_idx) ds_cstr (&label), n, mean, sqrt (var), false)); - + ds_destroy (&label); } } @@ -599,6 +607,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) const int nc = heading_columns + cmd->n_percentiles; t = tab_create (nc, nr); + tab_title (t, _("Percentiles")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -652,7 +661,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) int ivar_idx; if ( v > 0 ) tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - + tab_text (t, 0, heading_rows + v * rows_per_var, TAT_TITLE | TAB_LEFT, @@ -682,18 +691,18 @@ percentiles_report (const struct examine *cmd, int iact_idx) if (( diff_idx != -1 && diff_idx <= ivar_idx) || i == 0) - { + { struct string str; ds_init_empty (&str); append_value_name (ivar, val, &str); - + tab_text (t, 1 + ivar_idx, heading_rows + v * rows_per_var + i * rows_per_cat, TAT_TITLE | TAB_LEFT, ds_cstr (&str) ); - + ds_destroy (&str); } } @@ -705,7 +714,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) ); } - tab_text (t, heading_columns - 1, + tab_text (t, heading_columns - 1, heading_rows + v * rows_per_var + i * rows_per_cat, TAT_TITLE | TAB_LEFT, gettext (ptile_alg_desc [cmd->pc_alg])); @@ -714,44 +723,44 @@ percentiles_report (const struct examine *cmd, int iact_idx) for (p = 0; p < cmd->n_percentiles; ++p) { - tab_double (t, heading_columns + p, + tab_double (t, heading_columns + p, heading_rows + v * rows_per_var + i * rows_per_cat, 0, percentile_calculate (es->percentiles[p], cmd->pc_alg), - 0); - + NULL, RC_OTHER); + if (cmd->ptiles[p] == 25.0) { - tab_double (t, heading_columns + p, + tab_double (t, heading_columns + p, heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[0], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 50.0) { - tab_double (t, heading_columns + p, + tab_double (t, heading_columns + p, heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[1], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 75.0) { - tab_double (t, heading_columns + p, + tab_double (t, heading_columns + p, heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[2], - 0); + NULL, RC_OTHER); } } - tab_text (t, heading_columns - 1, + tab_text (t, heading_columns - 1, heading_rows + v * rows_per_var + i * rows_per_cat + 1, TAT_TITLE | TAB_LEFT, _("Tukey's Hinges")); - + } free (prev_vals); @@ -778,6 +787,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Descriptives")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -816,7 +826,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) int ivar_idx; if ( v > 0 ) tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - + tab_text (t, 0, heading_rows + v * rows_per_var, TAT_TITLE | TAB_LEFT, @@ -850,18 +860,18 @@ descriptives_report (const struct examine *cmd, int iact_idx) if (( diff_idx != -1 && diff_idx <= ivar_idx) || i == 0) - { + { struct string str; ds_init_empty (&str); append_value_name (ivar, val, &str); - + tab_text (t, 1 + ivar_idx, heading_rows + v * rows_per_var + i * rows_per_cat, TAT_TITLE | TAB_LEFT, ds_cstr (&str) ); - + ds_destroy (&str); } } @@ -883,12 +893,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, m1, 0); + 0, m1, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, calc_semean (m2, m0), 0); + 0, calc_semean (m2, m0), NULL, RC_OTHER); tab_text_format (t, 1 + iact->n_vars, @@ -897,7 +907,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) _("%g%% Confidence Interval for Mean"), cmd->conf * 100.0 ); - + tab_text (t, 1 + iact->n_vars + 1, heading_rows + v * rows_per_var + i * rows_per_cat + 1, @@ -908,7 +918,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, m1 - tval * calc_semean (m2, m0), 0); + 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -921,7 +931,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 2, - 0, m1 + tval * calc_semean (m2, m0), 0); + 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -936,7 +946,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 3, 0, trimmed_mean_calculate (es->trimmed_mean), - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -944,13 +954,13 @@ descriptives_report (const struct examine *cmd, int iact_idx) TAB_LEFT, _("Median") ); - + tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg), - 0); + NULL, RC_OTHER); tab_text (t, @@ -963,7 +973,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 5, - 0, m2, 0); + 0, m2, NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -975,7 +985,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 6, - 0, sqrt (m2), 0); + 0, sqrt (m2), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -987,9 +997,9 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 7, - 0, + 0, es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1001,9 +1011,9 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 8, - 0, + 0, es->maxima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1015,9 +1025,9 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 9, - 0, + 0, es->maxima[0].val - es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1031,9 +1041,9 @@ descriptives_report (const struct examine *cmd, int iact_idx) 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 10, 0, - percentile_calculate (es->quartiles[2], cmd->pc_alg) - + percentile_calculate (es->quartiles[2], cmd->pc_alg) - percentile_calculate (es->quartiles[0], cmd->pc_alg), - 0); + NULL, RC_OTHER); @@ -1048,12 +1058,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, m3, 0); + 0, m3, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, calc_seskew (m0), 0); + 0, calc_seskew (m0), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1065,12 +1075,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, m4, 0); + 0, m4, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, calc_sekurt (m0), 0); + 0, calc_sekurt (m0), NULL, RC_OTHER); } free (prev_val); @@ -1097,6 +1107,7 @@ extremes_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Extreme Values")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1113,7 +1124,7 @@ extremes_report (const struct examine *cmd, int iact_idx) tab_vline (t, TAL_2, heading_columns, 0, nr - 1); - if ( cmd->id_var ) + if ( cmd->id_var ) tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE, var_to_string (cmd->id_var)); else @@ -1139,7 +1150,7 @@ extremes_report (const struct examine *cmd, int iact_idx) int ivar_idx; if ( v > 0 ) tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var); - + tab_text (t, 0, heading_rows + v * rows_per_var, TAT_TITLE, @@ -1166,18 +1177,18 @@ extremes_report (const struct examine *cmd, int iact_idx) if (( diff_idx != -1 && diff_idx <= ivar_idx) || i == 0) - { + { struct string str; ds_init_empty (&str); append_value_name (ivar, val, &str); - + tab_text (t, 1 + ivar_idx, heading_rows + v * rows_per_var + i * rows_per_cat, TAT_TITLE | TAB_LEFT, ds_cstr (&str) ); - + ds_destroy (&str); } } @@ -1188,7 +1199,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat ); } - + tab_text (t, heading_columns - 2, heading_rows + v * rows_per_var + i * rows_per_cat, @@ -1213,7 +1224,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1224,28 +1235,28 @@ extremes_report (const struct examine *cmd, int iact_idx) &es->maxima[e].identity, cmd->id_var, NULL); - else + else tab_double (t, heading_columns, heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, es->maxima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + e, 0, es->maxima[e].val, - var_get_print_format (cmd->dep_vars[v])); - + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); + tab_double (t, heading_columns - 1, heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1262,14 +1273,14 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, es->minima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, 0, es->minima[e].val, - var_get_print_format (cmd->dep_vars[v])); + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); } } free (prev_val); @@ -1296,6 +1307,7 @@ summary_report (const struct examine *cmd, int iact_idx) const int nc = 6 + heading_columns; t = tab_create (nc, nr); + tab_set_format (t, RC_WEIGHT, wfmt); tab_title (t, _("Case Processing Summary")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1319,7 +1331,7 @@ summary_report (const struct examine *cmd, int iact_idx) TAB_CENTER | TAT_TITLE, _("Valid")); tab_joint_text (t, - heading_columns + 2, 1, + heading_columns + 2, 1, heading_columns + 3, 1, TAB_CENTER | TAT_TITLE, _("Missing")); @@ -1384,17 +1396,17 @@ summary_report (const struct examine *cmd, int iact_idx) if (( diff_idx != -1 && diff_idx <= ivar_idx) || i == 0) - { + { struct string str; ds_init_empty (&str); append_value_name (ivar, val, &str); - + tab_text (t, 1 + ivar_idx, heading_rows + n_cats * v + i, TAT_TITLE | TAB_LEFT, ds_cstr (&str) ); - + ds_destroy (&str); } } @@ -1402,18 +1414,18 @@ summary_report (const struct examine *cmd, int iact_idx) es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i); - - + + total = es[v].missing + es[v].non_missing; - tab_double (t, + tab_double (t, heading_columns + 0, heading_rows + n_cats * v + i, 0, es[v].non_missing, - wfmt); + NULL, RC_WEIGHT); - tab_text_format (t, + tab_text_format (t, heading_columns + 1, heading_rows + n_cats * v + i, 0, @@ -1422,29 +1434,29 @@ summary_report (const struct examine *cmd, int iact_idx) ); - tab_double (t, + tab_double (t, heading_columns + 2, heading_rows + n_cats * v + i, 0, es[v].missing, - wfmt); + NULL, RC_WEIGHT); - tab_text_format (t, + tab_text_format (t, heading_columns + 3, heading_rows + n_cats * v + i, 0, "%g%%", 100.0 * es[v].missing / total ); - tab_double (t, + tab_double (t, heading_columns + 4, heading_rows + n_cats * v + i, 0, total, - wfmt); + NULL, RC_WEIGHT); /* This can only be 100% can't it? */ - tab_text_format (t, + tab_text_format (t, heading_columns + 5, heading_rows + n_cats * v + i, 0, @@ -1461,32 +1473,13 @@ summary_report (const struct examine *cmd, int iact_idx) tab_submit (t); } - -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_match_variable (struct lexer *lexer, - const struct dictionary *dict, const struct variable **var) -{ - if (lex_token (lexer) != T_ID) - - return false; - - *var = parse_variable_const (lexer, dict); - - if ( *var == NULL) - return false; - return true; -} - /* Attempt to parse an interaction from LEXER */ static struct interaction * parse_interaction (struct lexer *lexer, struct examine *ex) { const struct variable *v = NULL; struct interaction *iact = NULL; - + if ( lex_match_variable (lexer, ex->dict, &v)) { iact = interaction_create (v); @@ -1502,7 +1495,7 @@ parse_interaction (struct lexer *lexer, struct examine *ex) } lex_match (lexer, T_COMMA); } - + return iact; } @@ -1511,7 +1504,7 @@ static void * create_n (const void *aux1, void *aux2 UNUSED) { int v; - + const struct examine *examine = aux1; struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es)); struct subcase ordering; @@ -1541,12 +1534,31 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, const struct examine *examine = aux1; struct exploratory_stats *es = user_data; + bool this_case_is_missing = false; + /* LISTWISE missing must be dealt with here */ + if (!examine->missing_pw) + { + for (v = 0; v < examine->n_dep_vars; v++) + { + const struct variable *var = examine->dep_vars[v]; + + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) + { + es[v].missing += weight; + this_case_is_missing = true; + } + } + } + + if (this_case_is_missing) + return; + for (v = 0; v < examine->n_dep_vars; v++) { struct ccase *outcase ; const struct variable *var = examine->dep_vars[v]; const double x = case_data (c, var)->f; - + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) { es[v].missing += weight; @@ -1572,7 +1584,7 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, case_data_idx (c, examine->id_idx), examine->id_width); case_data_rw_idx (outcase, EX_WT)->f = weight; - + es[v].cc += weight; if (es[v].cmin > weight) @@ -1593,12 +1605,11 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { int i; casenumber imin = 0; - double imax = es[v].cc; + casenumber imax; struct casereader *reader; struct ccase *c; - casenumber total_cases; - if (examine->histogram) + if (examine->histogramplot && es[v].non_missing > 0) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) @@ -1610,9 +1621,10 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) } es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); - total_cases = casereader_count_cases (es[v].sorted_reader); es[v].sorted_writer = NULL; + imax = casereader_get_case_cnt (es[v].sorted_reader); + es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); for (i = 0; i < examine->calc_extremes; ++i) @@ -1620,12 +1632,14 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ; value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ; } - + + bool warn = true; for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) { const double val = case_data_idx (c, EX_VAL)->f; - const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */ + double wt = case_data_idx (c, EX_WT)->f; + wt = var_force_valid_weight (examine->wv, wt, &warn); moments_pass_two (es[v].mom, val, wt); @@ -1641,19 +1655,19 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) min->val = val; value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } - imin += wt; + imin ++; } - imax -= wt; + imax --; if (imax < examine->calc_extremes) { int x; - for (x = imax; x < imax + wt; ++x) + for (x = imax; x < imax + 1; ++x) { struct extremity *max; - if (x >= examine->calc_extremes) + if (x >= examine->calc_extremes) break; max = &es[v].maxima[x]; @@ -1664,10 +1678,10 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) } casereader_destroy (reader); - if (examine->calc_extremes > 0) + if (examine->calc_extremes > 0 && es[v].non_missing > 0) { assert (es[v].minima[0].val == es[v].minimum); - assert (es[v].maxima[0].val == es[v].maximum); + assert (es[v].maxima[0].val == es[v].maximum); } { @@ -1708,7 +1722,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { struct order_stats *os; - es[v].box_whisker = box_whisker_create (es[v].hinges, + es[v].box_whisker = box_whisker_create (es[v].hinges, EX_ID, examine->id_var); os = &es[v].box_whisker->parent; @@ -1723,7 +1737,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct order_stats *os; moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL); - + es[v].np = np_create (n, mean, var); os = &es[v].np->parent; @@ -1738,7 +1752,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) static void cleanup_exploratory_stats (struct examine *cmd) -{ +{ int i; for (i = 0; i < cmd->n_iacts; ++i) { @@ -1786,6 +1800,12 @@ cleanup_exploratory_stats (struct examine *cmd) statistic_destroy (&es[v].histogram->parent); moments_destroy (es[v].mom); + if (es[v].box_whisker) + { + stat = &es[v].box_whisker->parent.parent; + stat->destroy (stat); + } + casereader_destroy (es[v].sorted_reader); } } @@ -1805,36 +1825,25 @@ run_examine (struct examine *cmd, struct casereader *input) payload.update = update_n; payload.calculate = calculate_n; payload.destroy = NULL; - + cmd->wv = dict_get_weight (cmd->dict); cmd->cats - = categoricals_create (cmd->iacts, cmd->n_iacts, + = categoricals_create (cmd->iacts, cmd->n_iacts, cmd->wv, cmd->dep_excl, cmd->fctr_excl); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->id_idx == -1) + if (cmd->id_var == NULL) { struct ccase *c = casereader_peek (input, 0); - assert (cmd->id_var == NULL); - cmd->id_idx = case_get_value_cnt (c); input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } - /* Remove cases on a listwise basis if requested */ - if ( cmd->missing_pw == false) - input = casereader_create_filter_missing (input, - cmd->dep_vars, - cmd->n_dep_vars, - cmd->dep_excl, - NULL, - NULL); - for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { @@ -1847,6 +1856,10 @@ run_examine (struct examine *cmd, struct casereader *input) { summary_report (cmd, i); + const size_t n_cats = categoricals_n_count (cmd->cats, i); + if (n_cats == 0) + continue; + if (cmd->disp_extremes > 0) extremes_report (cmd, i); @@ -1869,13 +1882,13 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogram) + if (cmd->histogramplot) show_histogram (cmd, i); if (cmd->npplot) show_npplot (cmd, i); - if (cmd->spreadlevel) + if (cmd->spreadlevelplot) show_spreadlevel (cmd, i); if (cmd->descriptives) @@ -1910,7 +1923,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.id_width = 0; examine.id_var = NULL; examine.boxplot_mode = BP_GROUPS; - + examine.ex_proto = caseproto_create (); examine.pool = pool_create (); @@ -1926,12 +1939,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.dep_excl = MV_ANY; examine.fctr_excl = MV_ANY; - examine.histogram = false; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; - examine.spreadlevel = false; + examine.spreadlevelplot = false; examine.sl_power = 0; - + examine.dep_vars = NULL; + examine.n_dep_vars = 0; examine.dict = dataset_dict (ds); /* Accept an optional, completely pointless "/VARIABLES=" */ @@ -1956,11 +1970,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) if (iact) { examine.n_iacts++; - iacts_mem = + iacts_mem = pool_nrealloc (examine.pool, iacts_mem, examine.n_iacts, sizeof (*iacts_mem)); - + iacts_mem[examine.n_iacts - 1] = iact; } } @@ -1988,6 +2002,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) int extr = 5; if (lex_match (lexer, T_LPAREN)) { + if (!lex_force_int (lexer)) + goto error; extr = lex_integer (lexer); if (extr < 0) @@ -2025,7 +2041,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) while (lex_is_number (lexer)) { double p = lex_number (lexer); - + if ( p <= 0 || p >= 100.0) { lex_error (lexer, @@ -2164,13 +2180,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogram = true; + examine.histogramplot = true; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevel = true; + examine.spreadlevelplot = true; examine.sl_power = 0; - if (lex_match (lexer, T_LPAREN)) + if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer)) { examine.sl_power = lex_integer (lexer); @@ -2181,29 +2197,29 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogram = false; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; } else if (lex_match (lexer, T_ALL)) { - examine.histogram = true; + examine.histogramplot = true; examine.npplot = true; examine.boxplot = true; } - else + else { lex_error (lexer, NULL); goto error; } lex_match (lexer, T_COMMA); - } + } } else if (lex_match_id (lexer, "CINTERVAL")) { if ( !lex_force_num (lexer)) goto error; - + examine.conf = lex_number (lexer); lex_get (lexer); } @@ -2283,7 +2299,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) struct casegrouper *grouper; struct casereader *group; bool ok; - + grouper = casegrouper_create_splits (proc_open (ds), examine.dict); while (casegrouper_get_next_group (grouper, &group)) run_examine (&examine, group); @@ -2293,6 +2309,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) caseproto_unref (examine.ex_proto); + for (i = 0; i < examine.n_iacts; ++i) + interaction_destroy (examine.iacts[i]); free (examine.ptiles); free (examine.dep_vars); pool_destroy (examine.pool);