1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/moments.h"
25 #include "data/val-type.h"
26 #include "data/value.h"
27 #include "libpspp/misc.h"
29 #include "gl/xalloc.h"
32 #define _(msgid) gettext (msgid)
34 /* Calculates variance, skewness, and kurtosis into *VARIANCE,
35 *SKEWNESS, and *KURTOSIS if they are non-null and not greater
36 moments than MAX_MOMENT. Accepts W as the total weight, D1 as
37 the total deviation from the estimated mean, and D2, D3, and
38 D4 as the sum of the squares, cubes, and 4th powers,
39 respectively, of the deviation from the estimated mean. */
41 calc_moments (enum moment max_moment,
42 double w, double d1, double d2, double d3, double d4,
43 double *variance, double *skewness, double *kurtosis)
47 if (max_moment >= MOMENT_VARIANCE && w > 1.)
49 double s2 = (d2 - pow2 (d1) / w) / (w - 1.);
53 /* From _SPSS Statistical Algorithms, 2nd ed.,
54 0-918469-89-9, section "DESCRIPTIVES". */
55 if (fabs (s2) >= 1e-20)
57 if (max_moment >= MOMENT_SKEWNESS && skewness != NULL && w > 2.)
59 double s3 = s2 * sqrt (s2);
60 double g1 = (w * d3) / ((w - 1.0) * (w - 2.0) * s3);
64 if (max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && w > 3.)
66 double den = (w - 2.) * (w - 3.) * pow2 (s2);
67 double g2 = (w * (w + 1) * d4 / (w - 1.) / den
68 - 3. * pow2 (d2) / den);
76 /* Two-pass moments. */
78 /* A set of two-pass moments. */
81 enum moment max_moment; /* Highest-order moment we're computing. */
82 int pass; /* Current pass (1 or 2). */
85 double w1; /* Total weight for pass 1, so far. */
86 double sum; /* Sum of values so far. */
87 double mean; /* Mean = sum / w1. */
90 double w2; /* Total weight for pass 2, so far. */
91 double d1; /* Sum of deviations from the mean. */
92 double d2; /* Sum of squared deviations from the mean. */
93 double d3; /* Sum of cubed deviations from the mean. */
94 double d4; /* Sum of (deviations from the mean)**4. */
97 /* Initializes moments M for calculating moment MAX_MOMENT and
100 init_moments (struct moments *m, enum moment max_moment)
103 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
104 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
105 m->max_moment = max_moment;
109 /* Clears out a set of moments so that it can be reused for a new
110 set of values. The moments to be calculated are not changed. */
112 moments_clear (struct moments *m)
119 /* Creates and returns a data structure for calculating moment
120 MAX_MOMENT and lower moments on a data series. The user
121 should call moments_pass_one() for each value in the series,
122 then call moments_pass_two() for the same set of values in the
123 same order, then call moments_calculate() to obtain the
124 moments. The user may ask for the mean at any time during the
125 first pass (using moments_calculate()), but otherwise no
126 statistics may be requested until the end of the second pass.
127 Call moments_destroy() when the moments are no longer
130 moments_create (enum moment max_moment)
132 struct moments *m = xmalloc (sizeof *m);
133 init_moments (m, max_moment);
137 /* Adds VALUE with the given WEIGHT to the calculation of
138 moments for the first pass. */
140 moments_pass_one (struct moments *m, double value, double weight)
143 assert (m->pass == 1);
145 if (value != SYSMIS && weight > 0.)
147 m->sum += value * weight;
152 /* Adds VALUE with the given WEIGHT to the calculation of
153 moments for the second pass. */
155 moments_pass_two (struct moments *m, double value, double weight)
162 m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
163 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
166 if (value != SYSMIS && weight >= 0.)
168 double d = value - m->mean;
169 double d1_delta = d * weight;
171 if (m->max_moment >= MOMENT_VARIANCE)
173 double d2_delta = d1_delta * d;
175 if (m->max_moment >= MOMENT_SKEWNESS)
177 double d3_delta = d2_delta * d;
179 if (m->max_moment >= MOMENT_KURTOSIS)
181 double d4_delta = d3_delta * d;
190 /* Calculates moments based on the input data. Stores the total
191 weight in *WEIGHT, the mean in *MEAN, the variance in
192 *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
193 *KURTOSIS. Any of these result parameters may be null
194 pointers, in which case the values are not calculated. If any
195 result cannot be calculated, either because they are undefined
196 based on the input data or because their moments are higher
197 than the maximum requested on moments_create(), then SYSMIS is
198 stored into that result. */
200 moments_calculate (const struct moments *m,
202 double *mean, double *variance,
203 double *skewness, double *kurtosis)
209 if (variance != NULL)
211 if (skewness != NULL)
213 if (kurtosis != NULL)
219 /* How many passes so far? */
222 /* In the first pass we can only calculate the mean. */
223 if (mean != NULL && m->w1 > 0.)
224 *mean = m->sum / m->w1;
228 /* After the second pass we can calculate any stat. We
229 don't support "online" computation during the second
230 pass, so As a simple self-check, the total weight for
231 the passes must agree. */
232 assert (m->pass == 2);
233 assert (m->w1 == m->w2);
239 calc_moments (m->max_moment,
240 m->w2, m->d1, m->d2, m->d3, m->d4,
241 variance, skewness, kurtosis);
246 /* Destroys a set of moments. */
248 moments_destroy (struct moments *m)
253 /* Calculates the requested moments on the CNT values in ARRAY.
254 Each value is given a weight of 1. The total weight is stored
255 into *WEIGHT (trivially) and the mean, variance, skewness, and
256 kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
257 *KURTOSIS, respectively. Any of the result pointers may be
258 null, in which case no value is stored. */
260 moments_of_doubles (const double *array, size_t cnt,
262 double *mean, double *variance,
263 double *skewness, double *kurtosis)
265 enum moment max_moment;
269 if (kurtosis != NULL)
270 max_moment = MOMENT_KURTOSIS;
271 else if (skewness != NULL)
272 max_moment = MOMENT_SKEWNESS;
273 else if (variance != NULL)
274 max_moment = MOMENT_VARIANCE;
276 max_moment = MOMENT_MEAN;
278 init_moments (&m, max_moment);
279 for (idx = 0; idx < cnt; idx++)
280 moments_pass_one (&m, array[idx], 1.);
281 for (idx = 0; idx < cnt; idx++)
282 moments_pass_two (&m, array[idx], 1.);
283 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
286 /* Calculates the requested moments on the CNT numeric values in
287 ARRAY. Each value is given a weight of 1. The total weight
288 is stored into *WEIGHT (trivially) and the mean, variance,
289 skewness, and kurtosis are stored into *MEAN, *VARIANCE,
290 *SKEWNESS, and *KURTOSIS, respectively. Any of the result
291 pointers may be null, in which case no value is stored. */
293 moments_of_values (const union value *array, size_t cnt,
295 double *mean, double *variance,
296 double *skewness, double *kurtosis)
298 enum moment max_moment;
302 if (kurtosis != NULL)
303 max_moment = MOMENT_KURTOSIS;
304 else if (skewness != NULL)
305 max_moment = MOMENT_SKEWNESS;
306 else if (variance != NULL)
307 max_moment = MOMENT_VARIANCE;
309 max_moment = MOMENT_MEAN;
311 init_moments (&m, max_moment);
312 for (idx = 0; idx < cnt; idx++)
313 moments_pass_one (&m, array[idx].f, 1.);
314 for (idx = 0; idx < cnt; idx++)
315 moments_pass_two (&m, array[idx].f, 1.);
316 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
319 /* One-pass moments. */
321 /* A set of one-pass moments. */
324 enum moment max_moment; /* Highest-order moment we're computing. */
325 double w; /* Total weight so far. */
326 double d1; /* Sum of deviations from the mean. */
327 double d2; /* Sum of squared deviations from the mean. */
328 double d3; /* Sum of cubed deviations from the mean. */
329 double d4; /* Sum of (deviations from the mean)**4. */
332 /* Initializes one-pass moments M for calculating moment
333 MAX_MOMENT and lower moments. */
335 init_moments1 (struct moments1 *m, enum moment max_moment)
338 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
339 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
340 m->max_moment = max_moment;
344 /* Clears out a set of one-pass moments so that it can be reused
345 for a new set of values. The moments to be calculated are not
348 moments1_clear (struct moments1 *m)
351 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
354 /* Creates and returns a data structure for calculating moment
355 MAX_MOMENT and lower moments on a data series in a single
356 pass. The user should call moments1_add() for each value in
357 the series. The user may call moments1_calculate() to obtain
358 the current moments at any time. Call moments1_destroy() when
359 the moments are no longer needed.
361 One-pass moments should only be used when two passes over the
362 data are impractical. */
364 moments1_create (enum moment max_moment)
366 struct moments1 *m = xmalloc (sizeof *m);
367 init_moments1 (m, max_moment);
371 /* Adds VALUE with the given WEIGHT to the calculation of
374 moments1_add (struct moments1 *m, double value, double weight)
378 if (value != SYSMIS && weight > 0.)
384 v1 = (weight / m->w) * (value - m->d1);
387 if (m->max_moment >= MOMENT_VARIANCE)
390 double w_prev_w = m->w * prev_w;
391 double prev_m2 = m->d2;
393 m->d2 += w_prev_w / weight * v2;
394 if (m->max_moment >= MOMENT_SKEWNESS)
396 double w2 = weight * weight;
398 double prev_m3 = m->d3;
400 m->d3 += (-3. * v1 * prev_m2
401 + w_prev_w / w2 * (m->w - 2. * weight) * v3);
402 if (m->max_moment >= MOMENT_KURTOSIS)
404 double w3 = w2 * weight;
407 m->d4 += (-4. * v1 * prev_m3
409 + ((pow2 (m->w) - 3. * weight * prev_w)
410 * v4 * w_prev_w / w3));
417 /* Calculates one-pass moments based on the input data. Stores
418 the total weight in *WEIGHT, the mean in *MEAN, the variance
419 in *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
420 *KURTOSIS. Any of these result parameters may be null
421 pointers, in which case the values are not calculated. If any
422 result cannot be calculated, either because they are undefined
423 based on the input data or because their moments are higher
424 than the maximum requested on moments_create(), then SYSMIS is
425 stored into that result. */
427 moments1_calculate (const struct moments1 *m,
429 double *mean, double *variance,
430 double *skewness, double *kurtosis)
436 if (variance != NULL)
438 if (skewness != NULL)
440 if (kurtosis != NULL)
451 calc_moments (m->max_moment,
452 m->w, 0., m->d2, m->d3, m->d4,
453 variance, skewness, kurtosis);
457 /* Destroy one-pass moments M. */
459 moments1_destroy (struct moments1 *m)
464 /* Returns the standard error of the skewness for the given total
467 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
468 section "DESCRIPTIVES". */
470 calc_seskew (double W)
472 return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
475 /* Returns the standard error of the kurtosis for the given total
478 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
479 section "DESCRIPTIVES", except that the sqrt symbol is omitted
482 calc_sekurt (double W)
484 return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
485 / ((W - 3.) * (W + 5.)));