From 1c817faf0b4f8f7e53d032c805f775e239c6a9f2 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Mon, 9 Apr 2012 12:51:05 +0200 Subject: [PATCH] EXAMINE: Implement MISSING=REPORT option --- doc/statistics.texi | 1 - src/language/stats/examine.c | 50 ++++++++----- tests/language/stats/examine.at | 126 ++++++++++++++++++++++++++++++++ 3 files changed, 159 insertions(+), 18 deletions(-) diff --git a/doc/statistics.texi b/doc/statistics.texi index 0ae3d37862..4452c1a631 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -408,7 +408,6 @@ If @subcmd{INCLUDE} is set, then user-missing values are included in the calculations, but system-missing values are not. If @subcmd{EXCLUDE} is set, which is the default, user-missing values are excluded as well as system-missing values. -This is the default. If @subcmd{LISTWISE} is set, then the entire case is excluded from analysis whenever any variable specified in any @cmd{/VARIABLES} subcommand diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 37cf0c0e3f..9bc287e510 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -66,6 +66,14 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid +static void +append_value_name (const struct variable *var, const union value *val, struct string *str) +{ + var_append_value_name (var, val, str); + if ( var_is_value_missing (var, val, MV_ANY)) + ds_put_cstr (str, _(" (missing)")); +} + enum bp_mode { BP_GROUPS, @@ -96,7 +104,8 @@ struct examine size_t n_iacts; struct interaction **iacts; - enum mv_class exclude; + enum mv_class dep_excl; + enum mv_class fctr_excl; const struct dictionary *dict; @@ -300,7 +309,7 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -359,7 +368,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -426,7 +435,7 @@ show_npplot (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -551,7 +560,7 @@ show_histogram (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -676,7 +685,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -844,7 +853,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -1160,7 +1169,7 @@ extremes_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -1378,7 +1387,7 @@ summary_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, heading_rows + n_cats * v + i, @@ -1538,7 +1547,7 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, const struct variable *var = examine->dep_vars[v]; const double x = case_data (c, var)->f; - if (var_is_value_missing (var, case_data (c, var), examine->exclude)) + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) { es[v].missing += weight; continue; @@ -1802,7 +1811,7 @@ run_examine (struct examine *cmd, struct casereader *input) cmd->cats = categoricals_create (cmd->iacts, cmd->n_iacts, - cmd->wv, cmd->exclude, MV_ANY); + cmd->wv, cmd->dep_excl, cmd->fctr_excl); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); @@ -1818,14 +1827,12 @@ run_examine (struct examine *cmd, struct casereader *input) case_unref (c); } - /* FIXME: Filter out missing factor variables */ - /* Remove cases on a listwise basis if requested */ if ( cmd->missing_pw == false) input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, - cmd->exclude, + cmd->dep_excl, NULL, NULL); @@ -1918,7 +1925,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *)); examine.iacts[0] = interaction_create (NULL); - examine.exclude = MV_ANY; + examine.dep_excl = MV_ANY; + examine.fctr_excl = MV_ANY; examine.histogram = false; examine.npplot = false; examine.boxplot = false; @@ -2102,11 +2110,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "EXCLUDE")) { - examine.exclude = MV_ANY; + examine.dep_excl = MV_ANY; } else if (lex_match_id (lexer, "INCLUDE")) { - examine.exclude = MV_SYSTEM; + examine.dep_excl = MV_SYSTEM; + } + else if (lex_match_id (lexer, "REPORT")) + { + examine.fctr_excl = MV_NEVER; + } + else if (lex_match_id (lexer, "NOREPORT")) + { + examine.fctr_excl = MV_ANY; } else { diff --git a/tests/language/stats/examine.at b/tests/language/stats/examine.at index f152f72899..3663a1bb79 100644 --- a/tests/language/stats/examine.at +++ b/tests/language/stats/examine.at @@ -670,3 +670,129 @@ EXAMINE AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore]) AT_CLEANUP + + +dnl Check the MISSING=REPORT option +AT_SETUP([EXAMINE -- MISSING=REPORT]) + + +AT_DATA([examine-report.sps], [dnl +set format = F22.0. +data list list /x * g *. +begin data. +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 +10 2 +20 2 +30 2 +40 2 +50 2 +60 2 +70 2 +80 2 +90 2 +101 9 +201 9 +301 9 +401 9 +501 99 +601 99 +701 99 +801 99 +901 99 +1001 . +2002 . +3003 . +4004 . +end data. + +MISSING VALUES g (9, 99, 999). + +EXAMINE + /VARIABLES = x + BY g + /STATISTICS = EXTREME + /NOTOTAL + /MISSING = REPORT. +]) + + +AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl +Table: Reading free-form data from INLINE. +Variable,Format +x,F8.0 +g,F8.0 + +Table: Case Processing Summary +,,Cases,,,,, +,,Valid,,Missing,,Total, +,g,N,Percent,N,Percent,N,Percent +x,. (missing),4,100%,0,0%,4,100% +,1,9,100%,0,0%,9,100% +,2,9,100%,0,0%,9,100% +,9 (missing),4,100%,0,0%,4,100% +,99 (missing),5,100%,0,0%,5,100% + +Table: Extreme Values +,g,,,Case Number,Value +x,. (missing),Highest,1,31,4004 +,,,2,30,3003 +,,,3,29,2002 +,,,4,28,1001 +,,,5,0,0 +,,Lowest,1,28,1001 +,,,2,29,2002 +,,,3,30,3003 +,,,4,31,4004 +,,,5,31,4004 +,1,Highest,1,9,9 +,,,2,8,8 +,,,3,7,7 +,,,4,6,6 +,,,5,5,5 +,,Lowest,1,1,1 +,,,2,2,2 +,,,3,3,3 +,,,4,4,4 +,,,5,5,5 +,2,Highest,1,18,90 +,,,2,17,80 +,,,3,16,70 +,,,4,15,60 +,,,5,14,50 +,,Lowest,1,10,10 +,,,2,11,20 +,,,3,12,30 +,,,4,13,40 +,,,5,14,50 +,9 (missing),Highest,1,22,401 +,,,2,21,301 +,,,3,20,201 +,,,4,19,101 +,,,5,0,0 +,,Lowest,1,19,101 +,,,2,20,201 +,,,3,21,301 +,,,4,22,401 +,,,5,22,401 +,99 (missing),Highest,1,27,901 +,,,2,26,801 +,,,3,25,701 +,,,4,24,601 +,,,5,23,501 +,,Lowest,1,23,501 +,,,2,24,601 +,,,3,25,701 +,,,4,26,801 +,,,5,27,901 +]) + + +AT_CLEANUP \ No newline at end of file -- 2.30.2