1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/format.h"
21 #include "data/variable.h"
23 #include "libpspp/bt.h"
24 #include "libpspp/hmap.h"
25 #include "libpspp/misc.h"
26 #include "libpspp/pool.h"
28 #include "math/moments.h"
29 #include "output/pivot-table.h"
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) (msgid)
39 /* A base struct for all statistics. */
44 /* Statistics which accumulate a single value. */
45 struct statistic_simple
47 struct statistic parent;
51 /* Statistics based on moments. */
52 struct statistic_moment
54 struct statistic parent;
59 static struct statistic *
60 default_create (struct pool *pool)
62 struct statistic_moment *pvd = pool_alloc (pool, sizeof *pvd);
64 pvd->mom = moments1_create (MOMENT_KURTOSIS);
66 return (struct statistic *) pvd;
70 default_update (struct statistic *stat, double w, double x)
72 struct statistic_moment *pvd = (struct statistic_moment *) stat;
74 moments1_add (pvd->mom, x, w);
78 default_destroy (struct statistic *stat)
80 struct statistic_moment *pvd = (struct statistic_moment *) stat;
81 moments1_destroy (pvd->mom);
85 /* Simple statistics have nothing to destroy. */
87 simple_destroy (struct statistic *stat UNUSED)
93 /* HARMONIC MEAN: The reciprocal of the sum of the reciprocals:
94 1 / (1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1})) */
98 struct statistic parent;
103 static struct statistic *
104 harmonic_create (struct pool *pool)
106 struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
111 return (struct statistic *) hm;
116 harmonic_update (struct statistic *stat, double w, double x)
118 struct harmonic_mean *hm = (struct harmonic_mean *) stat;
125 harmonic_get (const struct statistic *pvd)
127 const struct harmonic_mean *hm = (const struct harmonic_mean *) pvd;
129 return hm->n / hm->rsum;
134 /* GEOMETRIC MEAN: The nth root of the product of all n observations
135 pow ((x_0 * x_1 * ... x_{n - 1}), 1/n) */
136 struct geometric_mean
138 struct statistic parent;
143 static struct statistic *
144 geometric_create (struct pool *pool)
146 struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
151 return (struct statistic *) gm;
155 geometric_update (struct statistic *pvd, double w, double x)
157 struct geometric_mean *gm = (struct geometric_mean *)pvd;
158 gm->prod *= pow (x, w);
164 geometric_get (const struct statistic *pvd)
166 const struct geometric_mean *gm = (const struct geometric_mean *)pvd;
167 return pow (gm->prod, 1.0 / gm->n);
172 /* The getters for moment based statistics simply calculate the
173 moment. The only exception is Std Dev. which needs to call
177 sum_get (const struct statistic *pvd)
181 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
188 n_get (const struct statistic *pvd)
192 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, 0, 0, 0, 0);
198 arithmean_get (const struct statistic *pvd)
202 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0);
208 variance_get (const struct statistic *pvd)
210 double n, mean, variance;
212 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, &variance, 0, 0);
219 stddev_get (const struct statistic *pvd)
221 return sqrt (variance_get (pvd));
225 skew_get (const struct statistic *pvd)
229 moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0);
235 sekurt_get (const struct statistic *pvd)
239 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
241 return calc_sekurt (n);
245 seskew_get (const struct statistic *pvd)
249 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL);
251 return calc_seskew (n);
255 kurt_get (const struct statistic *pvd)
259 moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt);
265 semean_get (const struct statistic *pvd)
269 moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL);
271 return sqrt (var / n);
276 /* MIN: The smallest (closest to minus infinity) value. */
278 static struct statistic *
279 min_create (struct pool *pool)
281 struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
285 return (struct statistic *) pvd;
289 min_update (struct statistic *pvd, double w UNUSED, double x)
291 double *r = &((struct statistic_simple *)pvd)->acc;
298 min_get (const struct statistic *pvd)
300 double *r = &((struct statistic_simple *)pvd)->acc;
305 /* MAX: The largest (closest to plus infinity) value. */
307 static struct statistic *
308 max_create (struct pool *pool)
310 struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
314 return (struct statistic *) pvd;
318 max_update (struct statistic *pvd, double w UNUSED, double x)
320 double *r = &((struct statistic_simple *)pvd)->acc;
327 max_get (const struct statistic *pvd)
329 double *r = &((struct statistic_simple *)pvd)->acc;
338 struct statistic parent;
343 /* Initially min and max are set to their most (inverted) extreme possible
345 static struct statistic *
346 range_create (struct pool *pool)
348 struct range *r = pool_alloc (pool, sizeof *r);
353 return (struct statistic *) r;
356 /* On each update, set min and max to X or leave unchanged,
359 range_update (struct statistic *pvd, double w UNUSED, double x)
361 struct range *r = (struct range *) pvd;
370 /* Get the difference between min and max. */
372 range_get (const struct statistic *pvd)
374 const struct range *r = (struct range *) pvd;
376 return r->max - r->min;
381 /* LAST: The last value (the one closest to the end of the file). */
383 static struct statistic *
384 last_create (struct pool *pool)
386 struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
388 return (struct statistic *) pvd;
392 last_update (struct statistic *pvd, double w UNUSED, double x)
394 struct statistic_simple *stat = (struct statistic_simple *) pvd;
400 last_get (const struct statistic *pvd)
402 const struct statistic_simple *stat = (struct statistic_simple *) pvd;
407 /* FIRST: The first value (the one closest to the start of the file). */
409 static struct statistic *
410 first_create (struct pool *pool)
412 struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd);
416 return (struct statistic *) pvd;
420 first_update (struct statistic *pvd, double w UNUSED, double x)
422 struct statistic_simple *stat = (struct statistic_simple *) pvd;
424 if (stat->acc == SYSMIS)
429 first_get (const struct statistic *pvd)
431 const struct statistic_simple *stat = (struct statistic_simple *) pvd;
436 /* Table of cell_specs */
437 const struct cell_spec cell_spec[n_MEANS_STATISTICS] = {
438 {N_("Mean"), "MEAN", NULL , default_create, default_update, arithmean_get, default_destroy},
439 {N_("N"), "COUNT", PIVOT_RC_COUNT, default_create, default_update, n_get, default_destroy},
440 {N_("Std. Deviation"), "STDDEV", NULL , default_create, default_update, stddev_get, default_destroy},
442 {N_("Median"), "MEDIAN", NULL , default_create, default_update, NULL, default_destroy},
443 {N_("Group Median"), "GMEDIAN", NULL , default_create, default_update, NULL, default_destroy},
445 {N_("S.E. Mean"), "SEMEAN", NULL , default_create, default_update, semean_get, default_destroy},
446 {N_("Sum"), "SUM", NULL , default_create, default_update, sum_get, default_destroy},
447 {N_("Minimum"), "MIN", NULL , min_create, min_update, min_get, simple_destroy},
448 {N_("Maximum"), "MAX", NULL , max_create, max_update, max_get, simple_destroy},
449 {N_("Range"), "RANGE", NULL , range_create, range_update, range_get, simple_destroy},
450 {N_("Variance"), "VARIANCE", PIVOT_RC_OTHER, default_create, default_update, variance_get, default_destroy},
451 {N_("Kurtosis"), "KURT", PIVOT_RC_OTHER, default_create, default_update, kurt_get, default_destroy},
452 {N_("S.E. Kurt"), "SEKURT", PIVOT_RC_OTHER, default_create, default_update, sekurt_get, default_destroy},
453 {N_("Skewness"), "SKEW", PIVOT_RC_OTHER, default_create, default_update, skew_get, default_destroy},
454 {N_("S.E. Skew"), "SESKEW", PIVOT_RC_OTHER, default_create, default_update, seskew_get, default_destroy},
455 {N_("First"), "FIRST", NULL , first_create, first_update, first_get, simple_destroy},
456 {N_("Last"), "LAST", NULL , last_create, last_update, last_get, simple_destroy},
458 {N_("Percent N"), "NPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy},
459 {N_("Percent Sum"), "SPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy},
461 {N_("Harmonic Mean"), "HARMONIC", NULL , harmonic_create, harmonic_update, harmonic_get, simple_destroy},
462 {N_("Geom. Mean"), "GEOMETRIC", NULL , geometric_create, geometric_update, geometric_get, simple_destroy}