Use the sum of weights of only those cases which are valid to
authorJohn Darrington <john@darrington.wattle.id.au>
Tue, 10 Feb 2009 00:55:58 +0000 (09:55 +0900)
committerJohn Darrington <john@darrington.wattle.id.au>
Tue, 10 Feb 2009 00:55:58 +0000 (09:55 +0900)
calculate percentiles.

Fixes bug #25522

src/language/stats/examine.q
tests/bugs/examine-missing2.sh

index b564ba4415991aa1593f828fcdfc197635e6437d..78ea0e63c0b1989f3f35a0a1e1105e5bb269f76d 100644 (file)
@@ -122,6 +122,9 @@ struct factor_metrics
   /* Sum of all weights, including those for missing values */
   double n;
 
+  /* Sum of weights of non_missing values */
+  double n_valid;
+
   double mean;
 
   double variance;
@@ -934,23 +937,27 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
              case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f;
 
          const double weight = wv ? case_data (c, wv)->f : 1.0;
+         const union value *value = case_data (c, dependent_vars[v]);
 
          if (weight != SYSMIS)
            minimize (&result->metrics[v].cmin, weight);
 
          moments1_add (result->metrics[v].moments,
-                       case_data (c, dependent_vars[v])->f,
+                       value->f,
                        weight);
 
          result->metrics[v].n += weight;
 
+         if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) )
+           result->metrics[v].n_valid += weight;
+
          extrema_add (result->metrics[v].maxima,
-                      case_data (c, dependent_vars[v])->f,
+                      value->f,
                       weight,
                       loc);
 
          extrema_add (result->metrics[v].minima,
-                      case_data (c, dependent_vars[v])->f,
+                      value->f,
                       weight,
                       loc);
 
@@ -985,7 +992,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
          for (i = 0 ; i < metric->n_ptiles; ++i)
            {
              metric->ptl[i] = (struct percentile *)
-               percentile_create (percentile_list.data[i] / 100.0, metric->n);
+               percentile_create (percentile_list.data[i] / 100.0, metric->n_valid);
 
              if ( percentile_list.data[i] == 25)
                metric->quartiles[0] = metric->ptl[i];
index 97b192625f60b6d4752c7a1109f89fcb22705213..2cd6c14a6a00d930722149a77d5fc5da0d86fe7e 100755 (executable)
@@ -59,7 +59,7 @@ mkdir -p $TEMPDIR
 
 cd $TEMPDIR
 
-activity="create program"
+activity="create program 1"
 cat > $TESTFILE << EOF
 DATA LIST LIST /x * y *.
 BEGIN DATA.
@@ -77,7 +77,7 @@ EOF
 if [ $? -ne 0 ] ; then no_result ; fi
 
 
-activity="run program"
+activity="run program 1"
 $SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE
 if [ $? -ne 0 ] ; then no_result ; fi
 
@@ -115,4 +115,31 @@ diff -b  $TEMPDIR/pspp.list - <<EOF
 EOF
 if [ $? -ne 0 ] ; then fail ; fi
 
+
+#Make sure this doesn't interfere with percentiles operation.
+
+activity="create program 2"
+cat > $TESTFILE << EOF
+DATA LIST LIST /X *.
+BEGIN DATA.
+99
+99
+5.00
+END DATA.
+
+MISSING VALUE X (99).
+
+EXAMINE /x
+        /PERCENTILES=HAVERAGE.
+
+
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+
+activity="run program 2"
+$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE
+if [ $? -ne 0 ] ; then fail ; fi
+
+
 pass;