1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include <libpspp/alloc.h>
23 #include <libpspp/misc.h>
24 #include <data/value.h>
27 #define _(msgid) gettext (msgid)
29 /* Calculates variance, skewness, and kurtosis into *VARIANCE,
30 *SKEWNESS, and *KURTOSIS if they are non-null and not greater
31 moments than MAX_MOMENT. Accepts W as the total weight, D1 as
32 the total deviation from the estimated mean, and D2, D3, and
33 D4 as the sum of the squares, cubes, and 4th powers,
34 respectively, of the deviation from the estimated mean. */
36 calc_moments (enum moment max_moment,
37 double w, double d1, double d2, double d3, double d4,
38 double *variance, double *skewness, double *kurtosis)
42 if (max_moment >= MOMENT_VARIANCE && w > 1.)
44 double s2 = (d2 - pow2 (d1) / w) / (w - 1.);
48 /* From _SPSS Statistical Algorithms, 2nd ed.,
49 0-918469-89-9, section "DESCRIPTIVES". */
50 if (fabs (*variance) >= 1e-20)
52 if (max_moment >= MOMENT_SKEWNESS && skewness != NULL && w > 2.)
54 double s3 = s2 * sqrt (s2);
55 double g1 = (w * d3) / ((w - 1.0) * (w - 2.0) * s3);
59 if (max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && w > 3.)
61 double den = (w - 2.) * (w - 3.) * pow2 (s2);
62 double g2 = (w * (w + 1) * d4 / (w - 1.) / den
63 - 3. * pow2 (d2) / den);
71 /* Two-pass moments. */
73 /* A set of two-pass moments. */
76 enum moment max_moment; /* Highest-order moment we're computing. */
77 int pass; /* Current pass (1 or 2). */
80 double w1; /* Total weight for pass 1, so far. */
81 double sum; /* Sum of values so far. */
82 double mean; /* Mean = sum / w1. */
85 double w2; /* Total weight for pass 2, so far. */
86 double d1; /* Sum of deviations from the mean. */
87 double d2; /* Sum of squared deviations from the mean. */
88 double d3; /* Sum of cubed deviations from the mean. */
89 double d4; /* Sum of (deviations from the mean)**4. */
92 /* Initializes moments M for calculating moment MAX_MOMENT and
95 init_moments (struct moments *m, enum moment max_moment)
98 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
99 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
100 m->max_moment = max_moment;
104 /* Clears out a set of moments so that it can be reused for a new
105 set of values. The moments to be calculated are not changed. */
107 moments_clear (struct moments *m)
114 /* Creates and returns a data structure for calculating moment
115 MAX_MOMENT and lower moments on a data series. The user
116 should call moments_pass_one() for each value in the series,
117 then call moments_pass_two() for the same set of values in the
118 same order, then call moments_calculate() to obtain the
119 moments. The user may ask for the mean at any time during the
120 first pass (using moments_calculate()), but otherwise no
121 statistics may be requested until the end of the second pass.
122 Call moments_destroy() when the moments are no longer
125 moments_create (enum moment max_moment)
127 struct moments *m = xmalloc (sizeof *m);
128 init_moments (m, max_moment);
132 /* Adds VALUE with the given WEIGHT to the calculation of
133 moments for the first pass. */
135 moments_pass_one (struct moments *m, double value, double weight)
138 assert (m->pass == 1);
140 if (value != SYSMIS && weight > 0.)
142 m->sum += value * weight;
147 /* Adds VALUE with the given WEIGHT to the calculation of
148 moments for the second pass. */
150 moments_pass_two (struct moments *m, double value, double weight)
157 m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
158 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
161 if (value != SYSMIS && weight >= 0.)
163 double d = value - m->mean;
164 double d1_delta = d * weight;
166 if (m->max_moment >= MOMENT_VARIANCE)
168 double d2_delta = d1_delta * d;
170 if (m->max_moment >= MOMENT_SKEWNESS)
172 double d3_delta = d2_delta * d;
174 if (m->max_moment >= MOMENT_KURTOSIS)
176 double d4_delta = d3_delta * d;
185 /* Calculates moments based on the input data. Stores the total
186 weight in *WEIGHT, the mean in *MEAN, the variance in
187 *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
188 *KURTOSIS. Any of these result parameters may be null
189 pointers, in which case the values are not calculated. If any
190 result cannot be calculated, either because they are undefined
191 based on the input data or because their moments are higher
192 than the maximum requested on moments_create(), then SYSMIS is
193 stored into that result. */
195 moments_calculate (const struct moments *m,
197 double *mean, double *variance,
198 double *skewness, double *kurtosis)
204 if (variance != NULL)
206 if (skewness != NULL)
208 if (kurtosis != NULL)
214 /* How many passes so far? */
217 /* In the first pass we can only calculate the mean. */
218 if (mean != NULL && m->w1 > 0.)
219 *mean = m->sum / m->w1;
223 /* After the second pass we can calculate any stat. We
224 don't support "online" computation during the second
225 pass, so As a simple self-check, the total weight for
226 the passes must agree. */
227 assert (m->pass == 2);
228 assert (m->w1 == m->w2);
234 calc_moments (m->max_moment,
235 m->w2, m->d1, m->d2, m->d3, m->d4,
236 variance, skewness, kurtosis);
241 /* Destroys a set of moments. */
243 moments_destroy (struct moments *m)
248 /* Calculates the requested moments on the CNT values in ARRAY.
249 Each value is given a weight of 1. The total weight is stored
250 into *WEIGHT (trivially) and the mean, variance, skewness, and
251 kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
252 *KURTOSIS, respectively. Any of the result pointers may be
253 null, in which case no value is stored. */
255 moments_of_doubles (const double *array, size_t cnt,
257 double *mean, double *variance,
258 double *skewness, double *kurtosis)
260 enum moment max_moment;
264 if (kurtosis != NULL)
265 max_moment = MOMENT_KURTOSIS;
266 else if (skewness != NULL)
267 max_moment = MOMENT_SKEWNESS;
268 else if (variance != NULL)
269 max_moment = MOMENT_VARIANCE;
271 max_moment = MOMENT_MEAN;
273 init_moments (&m, max_moment);
274 for (idx = 0; idx < cnt; idx++)
275 moments_pass_one (&m, array[idx], 1.);
276 for (idx = 0; idx < cnt; idx++)
277 moments_pass_two (&m, array[idx], 1.);
278 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
281 /* Calculates the requested moments on the CNT numeric values in
282 ARRAY. Each value is given a weight of 1. The total weight
283 is stored into *WEIGHT (trivially) and the mean, variance,
284 skewness, and kurtosis are stored into *MEAN, *VARIANCE,
285 *SKEWNESS, and *KURTOSIS, respectively. Any of the result
286 pointers may be null, in which case no value is stored. */
288 moments_of_values (const union value *array, size_t cnt,
290 double *mean, double *variance,
291 double *skewness, double *kurtosis)
293 enum moment max_moment;
297 if (kurtosis != NULL)
298 max_moment = MOMENT_KURTOSIS;
299 else if (skewness != NULL)
300 max_moment = MOMENT_SKEWNESS;
301 else if (variance != NULL)
302 max_moment = MOMENT_VARIANCE;
304 max_moment = MOMENT_MEAN;
306 init_moments (&m, max_moment);
307 for (idx = 0; idx < cnt; idx++)
308 moments_pass_one (&m, array[idx].f, 1.);
309 for (idx = 0; idx < cnt; idx++)
310 moments_pass_two (&m, array[idx].f, 1.);
311 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
314 /* One-pass moments. */
316 /* A set of one-pass moments. */
319 enum moment max_moment; /* Highest-order moment we're computing. */
320 double w; /* Total weight so far. */
321 double d1; /* Sum of deviations from the mean. */
322 double d2; /* Sum of squared deviations from the mean. */
323 double d3; /* Sum of cubed deviations from the mean. */
324 double d4; /* Sum of (deviations from the mean)**4. */
327 /* Initializes one-pass moments M for calculating moment
328 MAX_MOMENT and lower moments. */
330 init_moments1 (struct moments1 *m, enum moment max_moment)
333 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
334 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
335 m->max_moment = max_moment;
339 /* Clears out a set of one-pass moments so that it can be reused
340 for a new set of values. The moments to be calculated are not
343 moments1_clear (struct moments1 *m)
346 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
349 /* Creates and returns a data structure for calculating moment
350 MAX_MOMENT and lower moments on a data series in a single
351 pass. The user should call moments1_add() for each value in
352 the series. The user may call moments1_calculate() to obtain
353 the current moments at any time. Call moments1_destroy() when
354 the moments are no longer needed.
356 One-pass moments should only be used when two passes over the
357 data are impractical. */
359 moments1_create (enum moment max_moment)
361 struct moments1 *m = xmalloc (sizeof *m);
362 init_moments1 (m, max_moment);
366 /* Adds VALUE with the given WEIGHT to the calculation of
369 moments1_add (struct moments1 *m, double value, double weight)
373 if (value != SYSMIS && weight > 0.)
379 v1 = (weight / m->w) * (value - m->d1);
382 if (m->max_moment >= MOMENT_VARIANCE)
385 double w_prev_w = m->w * prev_w;
386 double prev_m2 = m->d2;
388 m->d2 += w_prev_w / weight * v2;
389 if (m->max_moment >= MOMENT_SKEWNESS)
391 double w2 = weight * weight;
393 double prev_m3 = m->d3;
395 m->d3 += (-3. * v1 * prev_m2
396 + w_prev_w / w2 * (m->w - 2. * weight) * v3);
397 if (m->max_moment >= MOMENT_KURTOSIS)
399 double w3 = w2 * weight;
402 m->d4 += (-4. * v1 * prev_m3
404 + ((pow2 (m->w) - 3. * weight * prev_w)
405 * v4 * w_prev_w / w3));
412 /* Calculates one-pass moments based on the input data. Stores
413 the total weight in *WEIGHT, the mean in *MEAN, the variance
414 in *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
415 *KURTOSIS. Any of these result parameters may be null
416 pointers, in which case the values are not calculated. If any
417 result cannot be calculated, either because they are undefined
418 based on the input data or because their moments are higher
419 than the maximum requested on moments_create(), then SYSMIS is
420 stored into that result. */
422 moments1_calculate (const struct moments1 *m,
424 double *mean, double *variance,
425 double *skewness, double *kurtosis)
431 if (variance != NULL)
433 if (skewness != NULL)
435 if (kurtosis != NULL)
446 calc_moments (m->max_moment,
447 m->w, 0., m->d2, m->d3, m->d4,
448 variance, skewness, kurtosis);
452 /* Destroy one-pass moments M. */
454 moments1_destroy (struct moments1 *m)
459 /* Returns the standard error of the skewness for the given total
462 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
463 section "DESCRIPTIVES". */
465 calc_seskew (double W)
467 return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
470 /* Returns the standard error of the kurtosis for the given total
473 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
474 section "DESCRIPTIVES", except that the sqrt symbol is omitted
477 calc_sekurt (double W)
479 return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
480 / ((W - 3.) * (W + 5.)));