1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
29 /* FIXME? _SPSS Statistical Algorithms_ in the DESCRIPTIVES
30 second describes a "provisional means algorithm" that might be
31 useful for improving accuracy when we only do one pass. */
33 /* A set of moments in process of calculation. */
36 enum moment max_moment; /* Highest-order moment we're computing. */
37 int pass; /* Current pass (1 or 2). */
40 double w1; /* Total weight for pass 1, so far. */
41 double sum; /* Sum of values so far. */
42 double mean; /* Mean = sum / w1. */
45 double w2; /* Total weight for pass 2, so far. */
46 double d1; /* Sum of deviations from the mean. */
47 double d2; /* Sum of squared deviations from the mean. */
48 double d3; /* Sum of cubed deviations from the mean. */
49 double d4; /* Sum of (deviations from the mean)**4. */
52 /* Initializes moments M for calculating moment MAX_MOMENT and
55 init_moments (struct moments *m, enum moment max_moment)
58 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
59 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
60 m->max_moment = max_moment;
64 /* Clears out a set of moments so that it can be reused for a new
65 set of values. The moments to be calculated are not changed. */
67 moments_clear (struct moments *m)
74 /* Creates and returns a data structure for calculating moment
75 MAX_MOMENT and lower moments on a data series. For greatest
76 accuracy, the user should call moments_pass_one() for each
77 value in the series, then call moments_pass_two() for the same
78 set of values in the same order, then call moments_calculate()
79 to obtain the moments. At a cost of reduced accuracy, the
80 first pass can be skipped. In either case, moments_destroy()
81 should be called when the moments are no longer needed. */
83 moments_create (enum moment max_moment)
85 struct moments *m = xmalloc (sizeof *m);
86 init_moments (m, max_moment);
90 /* Adds VALUE with the given WEIGHT to the calculation of
91 moments for the first pass. */
93 moments_pass_one (struct moments *m, double value, double weight)
96 assert (m->pass == 1);
98 if (value != SYSMIS && weight >= 0.)
100 m->sum += value * weight;
105 /* Adds VALUE with the given WEIGHT to the calculation of
106 moments for the second pass. */
108 moments_pass_two (struct moments *m, double value, double weight)
117 m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
118 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
121 if (value != SYSMIS && weight >= 0.)
125 d = d_power = value - m->mean;
126 m->d1 += d_power * weight;
128 if (m->max_moment >= MOMENT_VARIANCE)
131 m->d2 += d_power * weight;
133 if (m->max_moment >= MOMENT_SKEWNESS)
136 m->d3 += d_power * weight;
138 if (m->max_moment >= MOMENT_KURTOSIS)
141 m->d4 += d_power * weight;
148 /* Calculates moments based on the input data. Stores the total
149 weight in *WEIGHT, the mean in *MEAN, the variance in
150 *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
151 *KURTOSIS. Any of these result parameters may be null
152 pointers, in which case the values are not calculated. If any
153 result cannot be calculated, either because they are undefined
154 based on the input data or because their moments are higher
155 than the maximum requested on moments_create(), then SYSMIS is
156 stored into that result. */
158 moments_calculate (const struct moments *m,
160 double *mean, double *variance,
161 double *skewness, double *kurtosis)
167 assert (m->pass == 2);
169 one_pass = m->w1 == 0.;
171 /* If passes 1 and 2 are used, then w1 and w2 must agree. */
172 assert (one_pass || m->w1 == m->w2);
176 if (variance != NULL)
178 if (skewness != NULL)
180 if (kurtosis != NULL)
190 *mean = m->mean + m->d1 / W;
192 if (m->max_moment >= MOMENT_VARIANCE && W > 1.)
196 /* From _Numerical Recipes in C_, 2nd ed., 0-521-43108-5,
198 if (variance == NULL)
199 variance = &variance_tmp;
200 *variance = (m->d2 - pow2 (m->d1) / W) / (W - 1.);
202 /* From _SPSS Statistical Algorithms, 2nd ed.,
203 0-918469-89-9, section "DESCRIPTIVES". */
204 if (fabs (*variance) >= 1e-20)
206 if (m->max_moment >= MOMENT_SKEWNESS && skewness != NULL && W > 2.)
208 *skewness = ((W * m->d3 - 3.0 * m->d1 * m->d2 + 2.0
210 / ((W - 1.0) * (W - 2.0)
211 * *variance * sqrt (*variance)));
212 if (!finite (*skewness))
215 if (m->max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && W > 3.)
217 *kurtosis = (((W + 1) * (W * m->d4
218 - 4.0 * m->d1 * m->d3
219 + 6.0 * pow2 (m->d1) * m->d2 / W
220 - 3.0 * pow4 (m->d1) / pow2 (W)))
221 / ((W - 1.0) * (W - 2.0) * (W - 3.0)
223 - (3.0 * pow2 (W - 1.0))
224 / ((W - 2.0) * (W - 3.)));
225 if (!finite (*kurtosis))
232 /* Destroys a set of moments. */
234 moments_destroy (struct moments *m)
239 /* Calculates the requested moments on the CNT values in ARRAY.
240 Each value is given a weight of 1. The total weight is stored
241 into *WEIGHT (trivially) and the mean, variance, skewness, and
242 kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
243 *KURTOSIS, respectively. Any of the result pointers may be
244 null, in which case no value is stored. */
246 moments_of_doubles (const double *array, size_t cnt,
248 double *mean, double *variance,
249 double *skewness, double *kurtosis)
251 enum moment max_moment;
255 if (kurtosis != NULL)
256 max_moment = MOMENT_KURTOSIS;
257 else if (skewness != NULL)
258 max_moment = MOMENT_SKEWNESS;
259 else if (variance != NULL)
260 max_moment = MOMENT_VARIANCE;
262 max_moment = MOMENT_MEAN;
264 init_moments (&m, max_moment);
265 for (idx = 0; idx < cnt; idx++)
266 moments_pass_one (&m, array[idx], 1.);
267 for (idx = 0; idx < cnt; idx++)
268 moments_pass_two (&m, array[idx], 1.);
269 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
272 /* Calculates the requested moments on the CNT numeric values in
273 ARRAY. Each value is given a weight of 1. The total weight
274 is stored into *WEIGHT (trivially) and the mean, variance,
275 skewness, and kurtosis are stored into *MEAN, *VARIANCE,
276 *SKEWNESS, and *KURTOSIS, respectively. Any of the result
277 pointers may be null, in which case no value is stored. */
279 moments_of_values (const union value *array, size_t cnt,
281 double *mean, double *variance,
282 double *skewness, double *kurtosis)
284 enum moment max_moment;
288 if (kurtosis != NULL)
289 max_moment = MOMENT_KURTOSIS;
290 else if (skewness != NULL)
291 max_moment = MOMENT_SKEWNESS;
292 else if (variance != NULL)
293 max_moment = MOMENT_VARIANCE;
295 max_moment = MOMENT_MEAN;
297 init_moments (&m, max_moment);
298 for (idx = 0; idx < cnt; idx++)
299 moments_pass_one (&m, array[idx].f, 1.);
300 for (idx = 0; idx < cnt; idx++)
301 moments_pass_two (&m, array[idx].f, 1.);
302 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
305 /* Returns the standard error of the skewness for the given total
308 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
309 section "DESCRIPTIVES". */
311 calc_seskew (double W)
313 return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
316 /* Returns the standard error of the kurtosis for the given total
319 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
320 section "DESCRIPTIVES", except that the sqrt symbol is omitted
323 calc_sekurt (double W)
325 return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
326 / ((W - 3.) * (W + 5.)));
334 read_values (double **values, double **weights, size_t *cnt)
341 while (token == T_NUM)
343 double value = tokval;
350 lex_error (_("expecting weight value"));
360 *values = xrealloc (*values, sizeof **values * cap);
361 *weights = xrealloc (*weights, sizeof **weights * cap);
364 (*values)[*cnt] = value;
365 (*weights)[*cnt] = weight;
373 cmd_debug_moments (void)
375 int retval = CMD_FAILURE;
376 struct moments *m = NULL;
377 double *values = NULL;
378 double *weights = NULL;
384 if (lex_match_id ("ONEPASS"))
388 lex_force_match ('/');
391 fprintf (stderr, "%s => ", lex_rest_of_line (NULL));
394 m = moments_create (MOMENT_KURTOSIS);
395 if (!read_values (&values, &weights, &cnt))
399 for (i = 0; i < cnt; i++)
400 moments_pass_one (m, values[i], weights[i]);
402 for (i = 0; i < cnt; i++)
403 moments_pass_two (m, values[i], weights[i]);
404 moments_calculate (m, &weight, &M[0], &M[1], &M[2], &M[3]);
406 fprintf (stderr, "W=%.3f", weight);
407 for (i = 0; i < 4; i++)
409 fprintf (stderr, " M%d=", i + 1);
411 fprintf (stderr, "sysmis");
412 else if (fabs (M[i]) <= 0.0005)
413 fprintf (stderr, "0.000");
415 fprintf (stderr, "%.3f", M[i]);
417 fprintf (stderr, "\n");
419 retval = lex_end_of_command ();