1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #define _(msgid) gettext (msgid)
32 /* Calculates variance, skewness, and kurtosis into *VARIANCE,
33 *SKEWNESS, and *KURTOSIS if they are non-null and not greater
34 moments than MAX_MOMENT. Accepts W as the total weight, D1 as
35 the total deviation from the estimated mean, and D2, D3, and
36 D4 as the sum of the squares, cubes, and 4th powers,
37 respectively, of the deviation from the estimated mean. */
39 calc_moments (enum moment max_moment,
40 double w, double d1, double d2, double d3, double d4,
41 double *variance, double *skewness, double *kurtosis)
45 if (max_moment >= MOMENT_VARIANCE && w > 1.)
49 /* From _Numerical Recipes in C_, 2nd ed., 0-521-43108-5,
51 s2 = (d2 - pow2 (d1) / w) / (w - 1.);
55 /* From _SPSS Statistical Algorithms, 2nd ed.,
56 0-918469-89-9, section "DESCRIPTIVES". */
57 if (fabs (*variance) >= 1e-20)
59 if (max_moment >= MOMENT_SKEWNESS && skewness != NULL && w > 2.)
61 double s3 = s2 * sqrt (s2);
62 double g1 = (w * d3) / ((w - 1.0) * (w - 2.0) * s3);
66 if (max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && w > 3.)
68 double den = (w - 2.) * (w - 3.) * pow2 (s2);
69 double g2 = (w * (w + 1) * d4 / (w - 1.) / den
70 - 3. * pow2 (d2) / den);
78 /* Two-pass moments. */
80 /* A set of two-pass moments. */
83 enum moment max_moment; /* Highest-order moment we're computing. */
84 int pass; /* Current pass (1 or 2). */
87 double w1; /* Total weight for pass 1, so far. */
88 double sum; /* Sum of values so far. */
89 double mean; /* Mean = sum / w1. */
92 double w2; /* Total weight for pass 2, so far. */
93 double d1; /* Sum of deviations from the mean. */
94 double d2; /* Sum of squared deviations from the mean. */
95 double d3; /* Sum of cubed deviations from the mean. */
96 double d4; /* Sum of (deviations from the mean)**4. */
99 /* Initializes moments M for calculating moment MAX_MOMENT and
102 init_moments (struct moments *m, enum moment max_moment)
105 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
106 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
107 m->max_moment = max_moment;
111 /* Clears out a set of moments so that it can be reused for a new
112 set of values. The moments to be calculated are not changed. */
114 moments_clear (struct moments *m)
121 /* Creates and returns a data structure for calculating moment
122 MAX_MOMENT and lower moments on a data series. The user
123 should call moments_pass_one() for each value in the series,
124 then call moments_pass_two() for the same set of values in the
125 same order, then call moments_calculate() to obtain the
126 moments. The user may ask for the mean at any time during the
127 first pass (using moments_calculate()), but otherwise no
128 statistics may be requested until the end of the second pass.
129 Call moments_destroy() when the moments are no longer
132 moments_create (enum moment max_moment)
134 struct moments *m = xmalloc (sizeof *m);
135 init_moments (m, max_moment);
139 /* Adds VALUE with the given WEIGHT to the calculation of
140 moments for the first pass. */
142 moments_pass_one (struct moments *m, double value, double weight)
145 assert (m->pass == 1);
147 if (value != SYSMIS && weight > 0.)
149 m->sum += value * weight;
154 /* Adds VALUE with the given WEIGHT to the calculation of
155 moments for the second pass. */
157 moments_pass_two (struct moments *m, double value, double weight)
166 m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
167 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
170 if (value != SYSMIS && weight >= 0.)
174 d = d_power = value - m->mean;
175 m->d1 += d_power * weight;
177 if (m->max_moment >= MOMENT_VARIANCE)
180 m->d2 += d_power * weight;
182 if (m->max_moment >= MOMENT_SKEWNESS)
185 m->d3 += d_power * weight;
187 if (m->max_moment >= MOMENT_KURTOSIS)
190 m->d4 += d_power * weight;
197 /* Calculates moments based on the input data. Stores the total
198 weight in *WEIGHT, the mean in *MEAN, the variance in
199 *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
200 *KURTOSIS. Any of these result parameters may be null
201 pointers, in which case the values are not calculated. If any
202 result cannot be calculated, either because they are undefined
203 based on the input data or because their moments are higher
204 than the maximum requested on moments_create(), then SYSMIS is
205 stored into that result. */
207 moments_calculate (const struct moments *m,
209 double *mean, double *variance,
210 double *skewness, double *kurtosis)
216 if (variance != NULL)
218 if (skewness != NULL)
220 if (kurtosis != NULL)
226 /* How many passes so far? */
229 /* In the first pass we can only calculate the mean. */
230 if (mean != NULL && m->w1 > 0.)
231 *mean = m->sum / m->w1;
235 /* After the second pass we can calculate any stat. We
236 don't support "online" computation during the second
237 pass, so As a simple self-check, the total weight for
238 the passes must agree. */
239 assert (m->pass == 2);
240 assert (m->w1 == m->w2);
246 calc_moments (m->max_moment,
247 m->w2, m->d1, m->d2, m->d3, m->d4,
248 variance, skewness, kurtosis);
253 /* Destroys a set of moments. */
255 moments_destroy (struct moments *m)
260 /* Calculates the requested moments on the CNT values in ARRAY.
261 Each value is given a weight of 1. The total weight is stored
262 into *WEIGHT (trivially) and the mean, variance, skewness, and
263 kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
264 *KURTOSIS, respectively. Any of the result pointers may be
265 null, in which case no value is stored. */
267 moments_of_doubles (const double *array, size_t cnt,
269 double *mean, double *variance,
270 double *skewness, double *kurtosis)
272 enum moment max_moment;
276 if (kurtosis != NULL)
277 max_moment = MOMENT_KURTOSIS;
278 else if (skewness != NULL)
279 max_moment = MOMENT_SKEWNESS;
280 else if (variance != NULL)
281 max_moment = MOMENT_VARIANCE;
283 max_moment = MOMENT_MEAN;
285 init_moments (&m, max_moment);
286 for (idx = 0; idx < cnt; idx++)
287 moments_pass_one (&m, array[idx], 1.);
288 for (idx = 0; idx < cnt; idx++)
289 moments_pass_two (&m, array[idx], 1.);
290 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
293 /* Calculates the requested moments on the CNT numeric values in
294 ARRAY. Each value is given a weight of 1. The total weight
295 is stored into *WEIGHT (trivially) and the mean, variance,
296 skewness, and kurtosis are stored into *MEAN, *VARIANCE,
297 *SKEWNESS, and *KURTOSIS, respectively. Any of the result
298 pointers may be null, in which case no value is stored. */
300 moments_of_values (const union value *array, size_t cnt,
302 double *mean, double *variance,
303 double *skewness, double *kurtosis)
305 enum moment max_moment;
309 if (kurtosis != NULL)
310 max_moment = MOMENT_KURTOSIS;
311 else if (skewness != NULL)
312 max_moment = MOMENT_SKEWNESS;
313 else if (variance != NULL)
314 max_moment = MOMENT_VARIANCE;
316 max_moment = MOMENT_MEAN;
318 init_moments (&m, max_moment);
319 for (idx = 0; idx < cnt; idx++)
320 moments_pass_one (&m, array[idx].f, 1.);
321 for (idx = 0; idx < cnt; idx++)
322 moments_pass_two (&m, array[idx].f, 1.);
323 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
326 /* One-pass moments. */
328 /* A set of one-pass moments. */
331 enum moment max_moment; /* Highest-order moment we're computing. */
332 double w; /* Total weight so far. */
333 double d1; /* Sum of deviations from the mean. */
334 double d2; /* Sum of squared deviations from the mean. */
335 double d3; /* Sum of cubed deviations from the mean. */
336 double d4; /* Sum of (deviations from the mean)**4. */
339 /* Initializes one-pass moments M for calculating moment
340 MAX_MOMENT and lower moments. */
342 init_moments1 (struct moments1 *m, enum moment max_moment)
345 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
346 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
347 m->max_moment = max_moment;
351 /* Clears out a set of one-pass moments so that it can be reused
352 for a new set of values. The moments to be calculated are not
355 moments1_clear (struct moments1 *m)
358 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
361 /* Creates and returns a data structure for calculating moment
362 MAX_MOMENT and lower moments on a data series in a single
363 pass. The user should call moments1_add() for each value in
364 the series. The user may call moments1_calculate() to obtain
365 the current moments at any time. Call moments1_destroy() when
366 the moments are no longer needed.
368 One-pass moments should only be used when two passes over the
369 data are impractical. */
371 moments1_create (enum moment max_moment)
373 struct moments1 *m = xmalloc (sizeof *m);
374 init_moments1 (m, max_moment);
378 /* Adds VALUE with the given WEIGHT to the calculation of
381 moments1_add (struct moments1 *m, double value, double weight)
385 if (value != SYSMIS && weight > 0.)
391 v1 = (weight / m->w) * (value - m->d1);
394 if (m->max_moment >= MOMENT_VARIANCE)
397 double w_prev_w = m->w * prev_w;
398 double prev_m2 = m->d2;
400 m->d2 += w_prev_w / weight * v2;
401 if (m->max_moment >= MOMENT_SKEWNESS)
403 double w2 = weight * weight;
405 double prev_m3 = m->d3;
407 m->d3 += (-3. * v1 * prev_m2
408 + w_prev_w / w2 * (m->w - 2. * weight) * v3);
409 if (m->max_moment >= MOMENT_KURTOSIS)
411 double w3 = w2 * weight;
414 m->d4 += (-4. * v1 * prev_m3
416 + ((pow2 (m->w) - 3. * weight * prev_w)
417 * v4 * w_prev_w / w3));
424 /* Calculates one-pass moments based on the input data. Stores
425 the total weight in *WEIGHT, the mean in *MEAN, the variance
426 in *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
427 *KURTOSIS. Any of these result parameters may be null
428 pointers, in which case the values are not calculated. If any
429 result cannot be calculated, either because they are undefined
430 based on the input data or because their moments are higher
431 than the maximum requested on moments_create(), then SYSMIS is
432 stored into that result. */
434 moments1_calculate (const struct moments1 *m,
436 double *mean, double *variance,
437 double *skewness, double *kurtosis)
443 if (variance != NULL)
445 if (skewness != NULL)
447 if (kurtosis != NULL)
458 calc_moments (m->max_moment,
459 m->w, 0., m->d2, m->d3, m->d4,
460 variance, skewness, kurtosis);
464 /* Destroy one-pass moments M. */
466 moments1_destroy (struct moments1 *m)
471 /* Returns the standard error of the skewness for the given total
474 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
475 section "DESCRIPTIVES". */
477 calc_seskew (double W)
479 return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
482 /* Returns the standard error of the kurtosis for the given total
485 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
486 section "DESCRIPTIVES", except that the sqrt symbol is omitted
489 calc_sekurt (double W)
491 return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
492 / ((W - 3.) * (W + 5.)));
500 read_values (double **values, double **weights, size_t *cnt)
507 while (lex_is_number ())
509 double value = tokval;
514 if (!lex_is_number ())
516 lex_error (_("expecting weight value"));
526 *values = xnrealloc (*values, cap, sizeof **values);
527 *weights = xnrealloc (*weights, cap, sizeof **weights);
530 (*values)[*cnt] = value;
531 (*weights)[*cnt] = weight;
539 cmd_debug_moments (void)
541 int retval = CMD_FAILURE;
542 double *values = NULL;
543 double *weights = NULL;
549 if (lex_match_id ("ONEPASS"))
553 lex_force_match ('/');
556 fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
561 struct moments *m = NULL;
563 m = moments_create (MOMENT_KURTOSIS);
564 if (!read_values (&values, &weights, &cnt))
569 for (i = 0; i < cnt; i++)
570 moments_pass_one (m, values[i], weights[i]);
571 for (i = 0; i < cnt; i++)
572 moments_pass_two (m, values[i], weights[i]);
573 moments_calculate (m, &weight, &M[0], &M[1], &M[2], &M[3]);
578 struct moments1 *m = NULL;
580 m = moments1_create (MOMENT_KURTOSIS);
581 if (!read_values (&values, &weights, &cnt))
583 moments1_destroy (m);
586 for (i = 0; i < cnt; i++)
587 moments1_add (m, values[i], weights[i]);
588 moments1_calculate (m, &weight, &M[0], &M[1], &M[2], &M[3]);
589 moments1_destroy (m);
592 fprintf (stderr, "W=%.3f", weight);
593 for (i = 0; i < 4; i++)
595 fprintf (stderr, " M%d=", i + 1);
597 fprintf (stderr, "sysmis");
598 else if (fabs (M[i]) <= 0.0005)
599 fprintf (stderr, "0.000");
601 fprintf (stderr, "%.3f", M[i]);
603 fprintf (stderr, "\n");
605 retval = lex_end_of_command ();