EXAMINE: Implement MISSING=REPORT option
authorJohn Darrington <john@darrington.wattle.id.au>
Mon, 9 Apr 2012 10:51:05 +0000 (12:51 +0200)
committerJohn Darrington <john@darrington.wattle.id.au>
Mon, 9 Apr 2012 10:51:05 +0000 (12:51 +0200)
doc/statistics.texi
src/language/stats/examine.c
tests/language/stats/examine.at

index 0ae3d3786258ee0963728fb95b69d34ec52a0b15..4452c1a631766bb1fb4b8209605d943fc1242c3f 100644 (file)
@@ -408,7 +408,6 @@ If @subcmd{INCLUDE} is set, then user-missing values are included in the
 calculations, but system-missing values are not.
 If @subcmd{EXCLUDE} is set, which is the default, user-missing
 values are excluded as well as system-missing values. 
-This is the default.
 
 If @subcmd{LISTWISE} is set, then the entire case is excluded from analysis
 whenever any variable  specified in any @cmd{/VARIABLES} subcommand
index 37cf0c0e3f6e4464cebd08cefd5a70c220cd72eb..9bc287e510fcd36677a62defb77b0dde2ba167d7 100644 (file)
 #define _(msgid) gettext (msgid)
 #define N_(msgid) msgid
 
+static void 
+append_value_name (const struct variable *var, const union value *val, struct string *str)
+{
+  var_append_value_name (var, val, str);
+  if ( var_is_value_missing (var, val, MV_ANY))
+    ds_put_cstr (str, _(" (missing)"));
+}
+
 enum bp_mode
   {
     BP_GROUPS,
@@ -96,7 +104,8 @@ struct examine
   size_t n_iacts;
   struct interaction **iacts;
 
-  enum mv_class exclude;
+  enum mv_class dep_excl;
+  enum mv_class fctr_excl;
 
   const struct dictionary *dict;
 
@@ -300,7 +309,7 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx)
               
               ds_put_cstr (&label, var_to_string (ivar));
               ds_put_cstr (&label, " = ");
-              var_append_value_name (ivar, val, &label);
+              append_value_name (ivar, val, &label);
               ds_put_cstr (&label, "; ");
             }
 
@@ -359,7 +368,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx)
               
               ds_put_cstr (&label, var_to_string (ivar));
               ds_put_cstr (&label, " = ");
-              var_append_value_name (ivar, val, &label);
+              append_value_name (ivar, val, &label);
               ds_put_cstr (&label, "; ");
             }
 
@@ -426,7 +435,7 @@ show_npplot (const struct examine *cmd, int iact_idx)
                   
                   ds_put_cstr (&label, var_to_string (ivar));
                   ds_put_cstr (&label, " = ");
-                  var_append_value_name (ivar, val, &label);
+                  append_value_name (ivar, val, &label);
                   ds_put_cstr (&label, "; ");
                   
                 }
@@ -551,7 +560,7 @@ show_histogram (const struct examine *cmd, int iact_idx)
                   
                   ds_put_cstr (&label, var_to_string (ivar));
                   ds_put_cstr (&label, " = ");
-                  var_append_value_name (ivar, val, &label);
+                  append_value_name (ivar, val, &label);
                   ds_put_cstr (&label, "; ");
                   
                 }
@@ -676,7 +685,7 @@ percentiles_report (const struct examine *cmd, int iact_idx)
                    {              
                      struct string str;
                      ds_init_empty (&str);
-                     var_append_value_name (ivar, val, &str);
+                     append_value_name (ivar, val, &str);
               
                      tab_text (t,
                                1 + ivar_idx,
@@ -844,7 +853,7 @@ descriptives_report (const struct examine *cmd, int iact_idx)
                 {              
                   struct string str;
                   ds_init_empty (&str);
-                  var_append_value_name (ivar, val, &str);
+                  append_value_name (ivar, val, &str);
               
                   tab_text (t,
                             1 + ivar_idx,
@@ -1160,7 +1169,7 @@ extremes_report (const struct examine *cmd, int iact_idx)
                 {              
                   struct string str;
                   ds_init_empty (&str);
-                  var_append_value_name (ivar, val, &str);
+                  append_value_name (ivar, val, &str);
               
                   tab_text (t,
                             1 + ivar_idx,
@@ -1378,7 +1387,7 @@ summary_report (const struct examine *cmd, int iact_idx)
                      {              
                        struct string str;
                        ds_init_empty (&str);
-                       var_append_value_name (ivar, val, &str);
+                       append_value_name (ivar, val, &str);
               
                        tab_text (t,
                                  1 + ivar_idx, heading_rows + n_cats * v + i,
@@ -1538,7 +1547,7 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
       const struct variable *var = examine->dep_vars[v];
       const double x = case_data (c, var)->f;
       
-      if (var_is_value_missing (var, case_data (c, var), examine->exclude))
+      if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
         {
           es[v].missing += weight;
           continue;
@@ -1802,7 +1811,7 @@ run_examine (struct examine *cmd, struct casereader *input)
 
   cmd->cats
     = categoricals_create (cmd->iacts, cmd->n_iacts,  
-                           cmd->wv, cmd->exclude, MV_ANY);
+                           cmd->wv, cmd->dep_excl, cmd->fctr_excl);
 
   categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
 
@@ -1818,14 +1827,12 @@ run_examine (struct examine *cmd, struct casereader *input)
       case_unref (c);
     }
 
-  /* FIXME: Filter out missing factor variables */
-
   /* Remove cases on a listwise basis if requested */
   if ( cmd->missing_pw == false)
     input = casereader_create_filter_missing (input,
                                               cmd->dep_vars,
                                               cmd->n_dep_vars,
-                                              cmd->exclude,
+                                              cmd->dep_excl,
                                               NULL,
                                               NULL);
 
@@ -1918,7 +1925,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds)
   examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
   examine.iacts[0] = interaction_create (NULL);
 
-  examine.exclude = MV_ANY;
+  examine.dep_excl = MV_ANY;
+  examine.fctr_excl = MV_ANY;
   examine.histogram = false;
   examine.npplot = false;
   examine.boxplot = false;
@@ -2102,11 +2110,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds)
                 }
               else if (lex_match_id (lexer, "EXCLUDE"))
                 {
-                  examine.exclude = MV_ANY;
+                  examine.dep_excl = MV_ANY;
                 }
               else if (lex_match_id (lexer, "INCLUDE"))
                 {
-                  examine.exclude = MV_SYSTEM;
+                  examine.dep_excl = MV_SYSTEM;
+                }
+              else if (lex_match_id (lexer, "REPORT"))
+                {
+                  examine.fctr_excl = MV_NEVER;
+                }
+              else if (lex_match_id (lexer, "NOREPORT"))
+                {
+                  examine.fctr_excl = MV_ANY;
                 }
               else
                 {
index f152f72899c7cdec3f6176b7006e470931528547..3663a1bb79c40e38ef8a0054b96dd96ae0e22e0f 100644 (file)
@@ -670,3 +670,129 @@ EXAMINE
 AT_CHECK([pspp -o pspp.csv examine-bad.sps], [1], [ignore])
 
 AT_CLEANUP 
+
+
+dnl Check the MISSING=REPORT option
+AT_SETUP([EXAMINE -- MISSING=REPORT])
+
+
+AT_DATA([examine-report.sps], [dnl
+set format = F22.0.
+data list list /x * g *.
+begin data.
+1   1
+2   1
+3   1
+4   1
+5   1
+6   1
+7   1
+8   1
+9   1
+10   2
+20   2
+30   2
+40   2
+50   2
+60   2
+70   2
+80   2
+90   2
+101   9
+201   9
+301   9
+401   9
+501   99
+601   99
+701   99
+801   99
+901   99
+1001  .
+2002  .
+3003  .
+4004  .
+end data.
+
+MISSING VALUES g (9, 99, 999).
+
+EXAMINE
+        /VARIABLES = x
+        BY  g
+        /STATISTICS = EXTREME
+        /NOTOTAL
+        /MISSING = REPORT.
+])
+
+
+AT_CHECK([pspp -O format=csv examine-report.sps], [0], [dnl
+Table: Reading free-form data from INLINE.
+Variable,Format
+x,F8.0
+g,F8.0
+
+Table: Case Processing Summary
+,,Cases,,,,,
+,,Valid,,Missing,,Total,
+,g,N,Percent,N,Percent,N,Percent
+x,. (missing),4,100%,0,0%,4,100%
+,1,9,100%,0,0%,9,100%
+,2,9,100%,0,0%,9,100%
+,9 (missing),4,100%,0,0%,4,100%
+,99 (missing),5,100%,0,0%,5,100%
+
+Table: Extreme Values
+,g,,,Case Number,Value
+x,. (missing),Highest,1,31,4004
+,,,2,30,3003
+,,,3,29,2002
+,,,4,28,1001
+,,,5,0,0
+,,Lowest,1,28,1001
+,,,2,29,2002
+,,,3,30,3003
+,,,4,31,4004
+,,,5,31,4004
+,1,Highest,1,9,9
+,,,2,8,8
+,,,3,7,7
+,,,4,6,6
+,,,5,5,5
+,,Lowest,1,1,1
+,,,2,2,2
+,,,3,3,3
+,,,4,4,4
+,,,5,5,5
+,2,Highest,1,18,90
+,,,2,17,80
+,,,3,16,70
+,,,4,15,60
+,,,5,14,50
+,,Lowest,1,10,10
+,,,2,11,20
+,,,3,12,30
+,,,4,13,40
+,,,5,14,50
+,9 (missing),Highest,1,22,401
+,,,2,21,301
+,,,3,20,201
+,,,4,19,101
+,,,5,0,0
+,,Lowest,1,19,101
+,,,2,20,201
+,,,3,21,301
+,,,4,22,401
+,,,5,22,401
+,99 (missing),Highest,1,27,901
+,,,2,26,801
+,,,3,25,701
+,,,4,24,601
+,,,5,23,501
+,,Lowest,1,23,501
+,,,2,24,601
+,,,3,25,701
+,,,4,26,801
+,,,5,27,901
+])
+
+
+AT_CLEANUP 
\ No newline at end of file