Fix bug #37984 - EXAMINE extremes vs. fractional weights.
authorJohn Darrington <john@darrington.wattle.id.au>
Tue, 1 Jan 2013 15:19:26 +0000 (16:19 +0100)
committerJohn Darrington <john@darrington.wattle.id.au>
Tue, 1 Jan 2013 15:19:26 +0000 (16:19 +0100)
There was a bug where extreme values were not calculated properly when
weights were fractional.  This change fixes this problem and adds a
test.

src/language/stats/examine.c
tests/language/stats/examine.at

index 5d308f11abe15d287bb2507318fadd41faaaf766..a20396397d74d87def5a0181dbc30502079b8ab6 100644 (file)
@@ -1574,7 +1574,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
     {
       int i;
       casenumber imin = 0;
-      double imax = es[v].cc;
+      casenumber imax;
       struct casereader *reader;
       struct ccase *c;
 
@@ -1592,6 +1592,8 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
       es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
       es[v].sorted_writer = NULL;
 
+      imax = casereader_get_case_cnt (es[v].sorted_reader);
+
       es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
       es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
       for (i = 0; i < examine->calc_extremes; ++i)
@@ -1604,7 +1606,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
            (c = casereader_read (reader)) != NULL; case_unref (c))
         {
           const double val = case_data_idx (c, EX_VAL)->f;
-          const double wt = case_data_idx (c, EX_WT)->f;   /* FIXME: What about fractional weights ??? */
+          const double wt = case_data_idx (c, EX_WT)->f;
 
           moments_pass_two (es[v].mom, val, wt);
 
@@ -1620,15 +1622,15 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
                   min->val = val;
                   value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
                 }
-              imin += wt;
+              imin ++;
             }
 
-          imax -= wt;
+          imax --;
           if (imax < examine->calc_extremes)
             {
               int x;
 
-              for (x = imax; x < imax + wt; ++x)
+              for (x = imax; x < imax + 1; ++x)
                 {
                   struct extremity *max;
 
@@ -1646,7 +1648,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
       if (examine->calc_extremes > 0)
         {
           assert (es[v].minima[0].val == es[v].minimum);
-          assert (es[v].maxima[0].val == es[v].maximum);
+         assert (es[v].maxima[0].val == es[v].maximum);
         }
 
       {
index 3663a1bb79c40e38ef8a0054b96dd96ae0e22e0f..0acbf5f33035030953ee7a1575ad3d58da8a435a 100644 (file)
@@ -227,6 +227,81 @@ V1,Highest,1,21,20.00
 ])
 AT_CLEANUP
 
+
+
+AT_SETUP([EXAMINE -- extremes with fractional weights])
+AT_DATA([extreme.sps], [dnl
+set format=F20.3.
+data list notable list /w * x *.
+begin data.
+ 0.88  300000
+ 0.86  320000
+ 0.98  480000
+ 0.93  960000
+ 1.35  960000
+ 1.31  960000
+ 0.88  960000
+ 0.88  1080000
+ 0.88  1080000
+ 0.95  1200000
+ 1.47  1200000
+ 0.93  1200000
+ 0.98  1320000
+ 1.31  1380000
+ 0.93  1440000
+ 0.88  1560000
+ 1.56  1560000
+ 1.47  1560000
+end data.
+
+weight by w.
+
+
+EXAMINE
+        x
+        /STATISTICS = DESCRIPTIVES EXTREME (5)
+        .
+])
+
+AT_CHECK([pspp -O format=csv  extreme.sps], [0], [dnl
+Table: Case Processing Summary
+,Cases,,,,,
+,Valid,,Missing,,Total,
+,N,Percent,N,Percent,N,Percent
+x,19.430,100%,.000,0%,19.430,100%
+
+Table: Extreme Values
+,,,Case Number,Value
+x,Highest,1,18,1560000.000
+,,2,17,1560000.000
+,,3,16,1560000.000
+,,4,15,1440000.000
+,,5,14,1380000.000
+,Lowest,1,1,300000.000
+,,2,2,320000.000
+,,3,3,480000.000
+,,4,4,960000.000
+,,5,5,960000.000
+
+Table: Descriptives
+,,,Statistic,Std. Error
+x,Mean,,1120010.293,86222.178
+,95% Confidence Interval for Mean,Lower Bound,939166.693,
+,,Upper Bound,1300853.894,
+,5% Trimmed Mean,,1141017.899,
+,Median,,1200000.000,
+,Variance,,144447748124.869,
+,Std. Deviation,,380062.821,
+,Minimum,,300000.000,
+,Maximum,,1560000.000,
+,Range,,1260000.000,
+,Interquartile Range,,467258.065,
+,Skewness,,-.887,.519
+,Kurtosis,,.340,1.005
+])
+
+AT_CLEANUP
+
 dnl Test the PERCENTILES subcommand of the EXAMINE command.
 dnl In particular test that it behaves properly when there are only 
 dnl a few cases.