Added a n_missing parameter to casereader_create_filter_missing.
authorJohn Darrington <john@marilyn.intra>
Thu, 4 Sep 2008 12:08:52 +0000 (20:08 +0800)
committerJohn Darrington <john@marilyn.intra>
Thu, 4 Sep 2008 12:08:52 +0000 (20:08 +0800)
Also update all callers.  Thanks to Ben for reviewing this patch.

src/data/casereader-filter.c
src/data/casereader.h
src/language/stats/chisquare.c
src/language/stats/glm.q
src/language/stats/npar-summary.c
src/language/stats/npar.q
src/language/stats/oneway.q
src/language/stats/rank.q
src/language/stats/regression.q
src/language/stats/t-test.q

index 37e1dc85c828f1908fef2454f3ecad872b9a6d68..b4275254fa24db01a182a66bf5000c48646b001e 100644 (file)
@@ -245,6 +245,7 @@ struct casereader_filter_missing
     struct variable **vars;     /* Variables whose values to filter. */
     size_t var_cnt;             /* Number of variables. */
     enum mv_class class;        /* Types of missing values to filter. */
+    casenumber *n_missing;
   };
 
 static bool casereader_filter_missing_include (const struct ccase *, void *);
@@ -264,6 +265,9 @@ static bool casereader_filter_missing_destroy (void *);
    read or, if that never occurs, until the filtering casereader
    is destroyed.
 
+   If N_MISSING is non-null, then after reading, it will be filled
+   with the total number of dropped cases.
+
    After this function is called, READER must not ever again
    be referenced directly.  It will be destroyed automatically
    when the filtering casereader is destroyed. */
@@ -271,6 +275,7 @@ struct casereader *
 casereader_create_filter_missing (struct casereader *reader,
                                   const struct variable **vars, size_t var_cnt,
                                   enum mv_class class,
+                                 casenumber *n_missing,
                                   struct casewriter *exclude)
 {
   if (var_cnt > 0 && class != MV_NEVER)
@@ -279,6 +284,8 @@ casereader_create_filter_missing (struct casereader *reader,
       cfm->vars = xmemdup (vars, sizeof *vars * var_cnt);
       cfm->var_cnt = var_cnt;
       cfm->class = class;
+      cfm->n_missing = n_missing;
+      if (n_missing) *n_missing = 0;
       return casereader_create_filter_func (reader,
                                             casereader_filter_missing_include,
                                             casereader_filter_missing_destroy,
@@ -302,7 +309,11 @@ casereader_filter_missing_include (const struct ccase *c, void *cfm_)
       struct variable *var = cfm->vars[i];
       const union value *value = case_data (c, var);
       if (var_is_value_missing (var, value, cfm->class))
-        return false;
+       {
+         if ( cfm->n_missing )
+           (*cfm->n_missing)++;
+         return false;
+       }
     }
   return true;
 }
index ba65cb18e7cc7f7d9f54dce259a44e9774e10542..ffbd17323ffc32274c69ce8c41f7598170a08d16 100644 (file)
@@ -98,6 +98,7 @@ struct casereader *
 casereader_create_filter_missing (struct casereader *,
                                   const struct variable **vars, size_t var_cnt,
                                   enum mv_class,
+                                 casenumber *n_missing,
                                   struct casewriter *exclude);
 
 struct casereader *
index 158064dcf920b6f539017c57d2bb9203e2c90457..1b77230642c6fca4ac6362bb872404615e92d4be 100644 (file)
@@ -344,7 +344,8 @@ chisquare_execute (const struct dataset *ds,
          struct hsh_table *freq_hash = NULL;
           struct casereader *reader =
             casereader_create_filter_missing (casereader_clone (input),
-                                              &ost->vars[v], 1, exclude, NULL);
+                                              &ost->vars[v], 1, exclude,
+                                             NULL, NULL);
          struct tab_table *freq_table =
             create_variable_frequency_table(dict, reader, cst, v, &freq_hash);
 
@@ -414,7 +415,8 @@ chisquare_execute (const struct dataset *ds,
          double total_obs = 0.0;
           struct casereader *reader =
             casereader_create_filter_missing (casereader_clone (input),
-                                              &ost->vars[v], 1, exclude, NULL);
+                                              &ost->vars[v], 1, exclude,
+                                             NULL, NULL);
          struct hsh_table *freq_hash =
            create_freq_hash_with_range (dict, reader,
                                          ost->vars[v], cst->lo, cst->hi);
index fd48b735e55b83a1f0c2908426ec7c4017306794..1b91ba8166c389bb0e40bb002ac9bc984b0364e9 100644 (file)
@@ -307,9 +307,9 @@ run_glm (struct casereader *input,
 
   reader = casereader_clone (input);
   reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
-                                            MV_ANY, NULL);
+                                            MV_ANY, NULL, NULL);
   reader = casereader_create_filter_missing (reader, v_dependent, 1,
-                                            MV_ANY, NULL);
+                                            MV_ANY, NULL, NULL);
   n_data = data_pass_one (casereader_clone (reader),
                          (const struct variable **) all_vars, n_all_vars,
                          mom);
index c9c2c9da1a9d1a215b0152612fc46eb1240b34e7..c752d503bbbde6af9eeb0bd3036656e6de2c6019 100644 (file)
@@ -52,7 +52,7 @@ npar_summary_calc_descriptives (struct descriptives *desc,
       pass = casereader_clone (input);
       pass = casereader_create_filter_missing (pass,
                                                &v, 1,
-                                               filter, NULL);
+                                               filter, NULL, NULL);
       pass = casereader_create_filter_weight (pass, dict, NULL, NULL);
       while (casereader_read(pass, &c))
        {
index 688ce2379bc103eaed0ce6f1b230a951053f0743..647205fd690a26e5e2a73e8323be872b40efc269 100644 (file)
@@ -186,7 +186,8 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
     input = casereader_create_filter_missing (input,
                                               npar_specs.vv,
                                               npar_specs.n_vars,
-                                              npar_specs.filter, NULL);
+                                              npar_specs.filter,
+                                              NULL, NULL);
 
   grouper = casegrouper_create_splits (input, dataset_dict (ds));
   while (casegrouper_get_next_group (grouper, &group))
index 7da992274ea999c8ad488a877264e6dba0aa5327..0600fdf2d2e6c260e204a3bca158c29a9803c9b3 100644 (file)
@@ -931,10 +931,10 @@ run_oneway (struct cmd_oneway *cmd,
 
   exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM;
   input = casereader_create_filter_missing (input, &indep_var, 1,
-                                            exclude, NULL);
+                                            exclude, NULL, NULL);
   if (cmd->miss == ONEWAY_LISTWISE)
     input = casereader_create_filter_missing (input, vars, n_vars,
-                                              exclude, NULL);
+                                              exclude, NULL, NULL);
   input = casereader_create_filter_weight (input, dict, NULL, NULL);
 
   reader = casereader_clone (input);
index cb63949076bb4b03d50d009b0805762a3d250dae..13facbdbbf59f85e43bd350578d4633cb07b880e 100644 (file)
@@ -486,7 +486,7 @@ rank_sorted_file (struct casereader *input,
 
 
   input = casereader_create_filter_missing (input, &rank_var, 1,
-                                            exclude_values, output);
+                                            exclude_values, NULL, output);
   input = casereader_create_filter_weight (input, dict, NULL, output);
 
   casereader_split (input, &pass1, &pass2);
index 1d31d1845e02e20064610bf3886cd8d11c71f5a0..0f8ae59433b615f3fb53fae51d04264bdbab88f1 100644 (file)
@@ -956,9 +956,9 @@ run_regression (struct casereader *input, struct cmd_regression *cmd,
       n_indep = identify_indep_vars (indep_vars, dep_var);
       reader = casereader_clone (input);
       reader = casereader_create_filter_missing (reader, indep_vars, n_indep,
-                                                MV_ANY, NULL);
+                                                MV_ANY, NULL, NULL);
       reader = casereader_create_filter_missing (reader, &dep_var, 1,
-                                                MV_ANY, NULL);
+                                                MV_ANY, NULL, NULL);
       n_data = prepare_categories (casereader_clone (reader),
                                   indep_vars, n_indep, mom);
 
index 5bb02e1767af92edd5267f4c2c9e42bbfcf9440a..c51d3bf25425763b084480f67e7303e3cb7b6477 100644 (file)
@@ -1815,7 +1815,7 @@ calculate (struct cmd_t_test *cmd,
     input = casereader_create_filter_missing (input,
                                               cmd->v_variables,
                                               cmd->n_variables,
-                                              exclude, NULL);
+                                              exclude, NULL, NULL);
 
   input = casereader_create_filter_weight (input, dict, NULL, NULL);