From 085d4563d8d5adfb3a2552d6ab1959c3fcacaee0 Mon Sep 17 00:00:00 2001
From: John Darrington <john@marilyn.intra>
Date: Thu, 4 Sep 2008 20:08:52 +0800
Subject: [PATCH] Added a n_missing parameter to
 casereader_create_filter_missing.

Also update all callers.  Thanks to Ben for reviewing this patch.
---
 src/data/casereader-filter.c      | 13 ++++++++++++-
 src/data/casereader.h             |  1 +
 src/language/stats/chisquare.c    |  6 ++++--
 src/language/stats/glm.q          |  4 ++--
 src/language/stats/npar-summary.c |  2 +-
 src/language/stats/npar.q         |  3 ++-
 src/language/stats/oneway.q       |  4 ++--
 src/language/stats/rank.q         |  2 +-
 src/language/stats/regression.q   |  4 ++--
 src/language/stats/t-test.q       |  2 +-
 10 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/data/casereader-filter.c b/src/data/casereader-filter.c
index 37e1dc85..b4275254 100644
--- a/src/data/casereader-filter.c
+++ b/src/data/casereader-filter.c
@@ -245,6 +245,7 @@ struct casereader_filter_missing
     struct variable **vars;     /* Variables whose values to filter. */
     size_t var_cnt;             /* Number of variables. */
     enum mv_class class;        /* Types of missing values to filter. */
+    casenumber *n_missing;
   };
 
 static bool casereader_filter_missing_include (const struct ccase *, void *);
@@ -264,6 +265,9 @@ static bool casereader_filter_missing_destroy (void *);
    read or, if that never occurs, until the filtering casereader
    is destroyed.
 
+   If N_MISSING is non-null, then after reading, it will be filled
+   with the total number of dropped cases.
+
    After this function is called, READER must not ever again
    be referenced directly.  It will be destroyed automatically
    when the filtering casereader is destroyed. */
@@ -271,6 +275,7 @@ struct casereader *
 casereader_create_filter_missing (struct casereader *reader,
                                   const struct variable **vars, size_t var_cnt,
                                   enum mv_class class,
+				  casenumber *n_missing,
                                   struct casewriter *exclude)
 {
   if (var_cnt > 0 && class != MV_NEVER)
@@ -279,6 +284,8 @@ casereader_create_filter_missing (struct casereader *reader,
       cfm->vars = xmemdup (vars, sizeof *vars * var_cnt);
       cfm->var_cnt = var_cnt;
       cfm->class = class;
+      cfm->n_missing = n_missing;
+      if (n_missing) *n_missing = 0;
       return casereader_create_filter_func (reader,
                                             casereader_filter_missing_include,
                                             casereader_filter_missing_destroy,
@@ -302,7 +309,11 @@ casereader_filter_missing_include (const struct ccase *c, void *cfm_)
       struct variable *var = cfm->vars[i];
       const union value *value = case_data (c, var);
       if (var_is_value_missing (var, value, cfm->class))
-        return false;
+	{
+	  if ( cfm->n_missing )
+	    (*cfm->n_missing)++;
+	  return false;
+	}
     }
   return true;
 }
diff --git a/src/data/casereader.h b/src/data/casereader.h
index ba65cb18..ffbd1732 100644
--- a/src/data/casereader.h
+++ b/src/data/casereader.h
@@ -98,6 +98,7 @@ struct casereader *
 casereader_create_filter_missing (struct casereader *,
                                   const struct variable **vars, size_t var_cnt,
                                   enum mv_class,
+				  casenumber *n_missing,
                                   struct casewriter *exclude);
 
 struct casereader *
diff --git a/src/language/stats/chisquare.c b/src/language/stats/chisquare.c
index 158064dc..1b772306 100644
--- a/src/language/stats/chisquare.c
+++ b/src/language/stats/chisquare.c
@@ -344,7 +344,8 @@ chisquare_execute (const struct dataset *ds,
 	  struct hsh_table *freq_hash = NULL;
           struct casereader *reader =
             casereader_create_filter_missing (casereader_clone (input),
-                                              &ost->vars[v], 1, exclude, NULL);
+                                              &ost->vars[v], 1, exclude,
+					      NULL, NULL);
 	  struct tab_table *freq_table =
             create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
 
@@ -414,7 +415,8 @@ chisquare_execute (const struct dataset *ds,
 	  double total_obs = 0.0;
           struct casereader *reader =
             casereader_create_filter_missing (casereader_clone (input),
-                                              &ost->vars[v], 1, exclude, NULL);
+                                              &ost->vars[v], 1, exclude,
+					      NULL, NULL);
 	  struct hsh_table *freq_hash =
 	    create_freq_hash_with_range (dict, reader,
                                          ost->vars[v], cst->lo, cst->hi);
diff --git a/src/language/stats/glm.q b/src/language/stats/glm.q
index fd48b735..1b91ba81 100644
--- a/src/language/stats/glm.q
+++ b/src/language/stats/glm.q
@@ -307,9 +307,9 @@ run_glm (struct casereader *input,
 
   reader = casereader_clone (input);
   reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
-					     MV_ANY, NULL);
+					     MV_ANY, NULL, NULL);
   reader = casereader_create_filter_missing (reader, v_dependent, 1,
-					     MV_ANY, NULL);
+					     MV_ANY, NULL, NULL);
   n_data = data_pass_one (casereader_clone (reader),
 			  (const struct variable **) all_vars, n_all_vars,
 			  mom);
diff --git a/src/language/stats/npar-summary.c b/src/language/stats/npar-summary.c
index c9c2c9da..c752d503 100644
--- a/src/language/stats/npar-summary.c
+++ b/src/language/stats/npar-summary.c
@@ -52,7 +52,7 @@ npar_summary_calc_descriptives (struct descriptives *desc,
       pass = casereader_clone (input);
       pass = casereader_create_filter_missing (pass,
                                                &v, 1,
-                                               filter, NULL);
+                                               filter, NULL, NULL);
       pass = casereader_create_filter_weight (pass, dict, NULL, NULL);
       while (casereader_read(pass, &c))
 	{
diff --git a/src/language/stats/npar.q b/src/language/stats/npar.q
index 688ce237..647205fd 100644
--- a/src/language/stats/npar.q
+++ b/src/language/stats/npar.q
@@ -186,7 +186,8 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
     input = casereader_create_filter_missing (input,
                                               npar_specs.vv,
                                               npar_specs.n_vars,
-                                              npar_specs.filter, NULL);
+                                              npar_specs.filter,
+					       NULL, NULL);
 
   grouper = casegrouper_create_splits (input, dataset_dict (ds));
   while (casegrouper_get_next_group (grouper, &group))
diff --git a/src/language/stats/oneway.q b/src/language/stats/oneway.q
index 7da99227..0600fdf2 100644
--- a/src/language/stats/oneway.q
+++ b/src/language/stats/oneway.q
@@ -931,10 +931,10 @@ run_oneway (struct cmd_oneway *cmd,
 
   exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
   input = casereader_create_filter_missing (input, &indep_var, 1,
-                                            exclude, NULL);
+                                            exclude, NULL, NULL);
   if (cmd->miss == ONEWAY_LISTWISE)
     input = casereader_create_filter_missing (input, vars, n_vars,
-                                              exclude, NULL);
+                                              exclude, NULL, NULL);
   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 
   reader = casereader_clone (input);
diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q
index cb639490..13facbdb 100644
--- a/src/language/stats/rank.q
+++ b/src/language/stats/rank.q
@@ -486,7 +486,7 @@ rank_sorted_file (struct casereader *input,
 
 
   input = casereader_create_filter_missing (input, &rank_var, 1,
-                                            exclude_values, output);
+                                            exclude_values, NULL, output);
   input = casereader_create_filter_weight (input, dict, NULL, output);
 
   casereader_split (input, &pass1, &pass2);
diff --git a/src/language/stats/regression.q b/src/language/stats/regression.q
index 1d31d184..0f8ae594 100644
--- a/src/language/stats/regression.q
+++ b/src/language/stats/regression.q
@@ -956,9 +956,9 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
       n_indep = identify_indep_vars (indep_vars, dep_var);
       reader = casereader_clone (input);
       reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
-						 MV_ANY, NULL);
+						 MV_ANY, NULL, NULL);
       reader = casereader_create_filter_missing (reader, &dep_var, 1,
-						 MV_ANY, NULL);
+						 MV_ANY, NULL, NULL);
       n_data = prepare_categories (casereader_clone (reader),
 				   indep_vars, n_indep, mom);
 
diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q
index 5bb02e17..c51d3bf2 100644
--- a/src/language/stats/t-test.q
+++ b/src/language/stats/t-test.q
@@ -1815,7 +1815,7 @@ calculate (struct cmd_t_test *cmd,
     input = casereader_create_filter_missing (input,
                                               cmd->v_variables,
                                               cmd->n_variables,
-                                              exclude, NULL);
+                                              exclude, NULL, NULL);
 
   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 
-- 
2.30.2