X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmeans-calc.c;h=6aabbf50cbad1b8448eb3de326b882f43ce2f02e;hb=4d777aeacfa602840718862c31c9059e3d289eed;hp=5e49b1802c06a0a405874182260a8ef839e4e0c0;hpb=ad7ae8bb105c925b951e241fd6a3d1cb93d614a0;p=pspp diff --git a/src/language/stats/means-calc.c b/src/language/stats/means-calc.c index 5e49b1802c..6aabbf50cb 100644 --- a/src/language/stats/means-calc.c +++ b/src/language/stats/means-calc.c @@ -26,6 +26,7 @@ #include "libpspp/pool.h" #include "math/moments.h" +#include "output/pivot-table.h" #include @@ -35,53 +36,71 @@ #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) -struct per_var_data +/* A base struct for all statistics. */ +struct statistic { }; -struct per_var_data_simple +/* Statistics which accumulate a single value. */ +struct statistic_simple { - struct per_var_data parent; + struct statistic parent; double acc; }; -struct per_var_data_moment +/* Statistics based on moments. */ +struct statistic_moment { - struct per_var_data parent; + struct statistic parent; struct moments1 *mom; }; -static struct per_var_data * + +static struct statistic * default_create (struct pool *pool) { - struct per_var_data_moment *pvd = pool_alloc (pool, sizeof *pvd); + struct statistic_moment *pvd = pool_alloc (pool, sizeof *pvd); pvd->mom = moments1_create (MOMENT_KURTOSIS); - return (struct per_var_data *) pvd; + return (struct statistic *) pvd; } static void -default_update (struct per_var_data *stat, double w, double x) +default_update (struct statistic *stat, double w, double x) { - struct per_var_data_moment *pvd = (struct per_var_data_moment *)stat; + struct statistic_moment *pvd = (struct statistic_moment *) stat; moments1_add (pvd->mom, x, w); } +static void +default_destroy (struct statistic *stat) +{ + struct statistic_moment *pvd = (struct statistic_moment *) stat; + moments1_destroy (pvd->mom); +} + + +/* Simple statistics have nothing to destroy. */ +static void +simple_destroy (struct statistic *stat UNUSED) +{ +} + /* HARMONIC MEAN: The reciprocal of the sum of the reciprocals: - 1 / ( 1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1}) ) */ + 1 / (1/(x_0) + 1/(x_1) + ... + 1/(x_{n-1})) */ struct harmonic_mean { - struct per_var_data parent; + struct statistic parent; double rsum; double n; }; -static struct per_var_data * +static struct statistic * harmonic_create (struct pool *pool) { struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm); @@ -89,21 +108,21 @@ harmonic_create (struct pool *pool) hm->rsum = 0; hm->n = 0; - return (struct per_var_data *) hm; + return (struct statistic *) hm; } static void -harmonic_update (struct per_var_data *stat, double w, double x) +harmonic_update (struct statistic *stat, double w, double x) { - struct harmonic_mean *hm = (struct harmonic_mean *)stat; + struct harmonic_mean *hm = (struct harmonic_mean *) stat; hm->rsum += w / x; hm->n += w; } static double -harmonic_get (const struct per_var_data *pvd) +harmonic_get (const struct statistic *pvd) { const struct harmonic_mean *hm = (const struct harmonic_mean *) pvd; @@ -116,12 +135,12 @@ harmonic_get (const struct per_var_data *pvd) pow ((x_0 * x_1 * ... x_{n - 1}), 1/n) */ struct geometric_mean { - struct per_var_data parent; + struct statistic parent; double prod; double n; }; -static struct per_var_data * +static struct statistic * geometric_create (struct pool *pool) { struct geometric_mean *gm = pool_alloc (pool, sizeof *gm); @@ -129,11 +148,11 @@ geometric_create (struct pool *pool) gm->prod = 1.0; gm->n = 0; - return (struct per_var_data *) gm; + return (struct statistic *) gm; } static void -geometric_update (struct per_var_data *pvd, double w, double x) +geometric_update (struct statistic *pvd, double w, double x) { struct geometric_mean *gm = (struct geometric_mean *)pvd; gm->prod *= pow (x, w); @@ -142,7 +161,7 @@ geometric_update (struct per_var_data *pvd, double w, double x) static double -geometric_get (const struct per_var_data *pvd) +geometric_get (const struct statistic *pvd) { const struct geometric_mean *gm = (const struct geometric_mean *)pvd; return pow (gm->prod, 1.0 / gm->n); @@ -150,103 +169,104 @@ geometric_get (const struct per_var_data *pvd) +/* The getters for moment based statistics simply calculate the + moment. The only exception is Std Dev. which needs to call + sqrt as well. */ + static double -sum_get (const struct per_var_data *pvd) +sum_get (const struct statistic *pvd) { double n, mean; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, 0, 0, 0); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0); return mean * n; } static double -n_get (const struct per_var_data *pvd) +n_get (const struct statistic *pvd) { double n; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, 0, 0, 0, 0); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, 0, 0, 0, 0); return n; } static double -arithmean_get (const struct per_var_data *pvd) +arithmean_get (const struct statistic *pvd) { double n, mean; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, 0, 0, 0); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, 0, 0, 0); return mean; } static double -variance_get (const struct per_var_data *pvd) +variance_get (const struct statistic *pvd) { double n, mean, variance; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, &mean, &variance, 0, 0); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, &mean, &variance, 0, 0); return variance; } static double -stddev_get (const struct per_var_data *pvd) +stddev_get (const struct statistic *pvd) { return sqrt (variance_get (pvd)); } - - - static double -skew_get (const struct per_var_data *pvd) +skew_get (const struct statistic *pvd) { double skew; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0); + moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, &skew, 0); return skew; } static double -sekurt_get (const struct per_var_data *pvd) +sekurt_get (const struct statistic *pvd) { double n; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); return calc_sekurt (n); } static double -seskew_get (const struct per_var_data *pvd) +seskew_get (const struct statistic *pvd) { double n; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, NULL, NULL, NULL); return calc_seskew (n); } static double -kurt_get (const struct per_var_data *pvd) +kurt_get (const struct statistic *pvd) { double kurt; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt); + moments1_calculate (((struct statistic_moment *)pvd)->mom, NULL, NULL, NULL, NULL, &kurt); return kurt; } static double -semean_get (const struct per_var_data *pvd) +semean_get (const struct statistic *pvd) { double n, var; - moments1_calculate (((struct per_var_data_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL); + moments1_calculate (((struct statistic_moment *)pvd)->mom, &n, NULL, &var, NULL, NULL); return sqrt (var / n); } @@ -255,58 +275,58 @@ semean_get (const struct per_var_data *pvd) /* MIN: The smallest (closest to minus infinity) value. */ -static struct per_var_data * +static struct statistic * min_create (struct pool *pool) { - struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd); + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); pvd->acc = DBL_MAX; - return (struct per_var_data *) pvd; + return (struct statistic *) pvd; } static void -min_update (struct per_var_data *pvd, double w UNUSED, double x) +min_update (struct statistic *pvd, double w UNUSED, double x) { - double *r = &((struct per_var_data_simple *)pvd)->acc; + double *r = &((struct statistic_simple *)pvd)->acc; if (x < *r) *r = x; } static double -min_get (const struct per_var_data *pvd) +min_get (const struct statistic *pvd) { - double *r = &((struct per_var_data_simple *)pvd)->acc; + double *r = &((struct statistic_simple *)pvd)->acc; return *r; } /* MAX: The largest (closest to plus infinity) value. */ -static struct per_var_data * +static struct statistic * max_create (struct pool *pool) { - struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd); + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); pvd->acc = -DBL_MAX; - return (struct per_var_data *) pvd; + return (struct statistic *) pvd; } static void -max_update (struct per_var_data *pvd, double w UNUSED, double x) +max_update (struct statistic *pvd, double w UNUSED, double x) { - double *r = &((struct per_var_data_simple *)pvd)->acc; + double *r = &((struct statistic_simple *)pvd)->acc; if (x > *r) *r = x; } static double -max_get (const struct per_var_data *pvd) +max_get (const struct statistic *pvd) { - double *r = &((struct per_var_data_simple *)pvd)->acc; + double *r = &((struct statistic_simple *)pvd)->acc; return *r; } @@ -315,12 +335,14 @@ max_get (const struct per_var_data *pvd) struct range { - struct per_var_data parent; + struct statistic parent; double min; double max; }; -static struct per_var_data * +/* Initially min and max are set to their most (inverted) extreme possible + values. */ +static struct statistic * range_create (struct pool *pool) { struct range *r = pool_alloc (pool, sizeof *r); @@ -328,11 +350,13 @@ range_create (struct pool *pool) r->min = DBL_MAX; r->max = -DBL_MAX; - return (struct per_var_data *) r; + return (struct statistic *) r; } +/* On each update, set min and max to X or leave unchanged, + as appropriate. */ static void -range_update (struct per_var_data *pvd, double w UNUSED, double x) +range_update (struct statistic *pvd, double w UNUSED, double x) { struct range *r = (struct range *) pvd; @@ -343,8 +367,9 @@ range_update (struct per_var_data *pvd, double w UNUSED, double x) r->min = x; } +/* Get the difference between min and max. */ static double -range_get (const struct per_var_data *pvd) +range_get (const struct statistic *pvd) { const struct range *r = (struct range *) pvd; @@ -355,84 +380,84 @@ range_get (const struct per_var_data *pvd) /* LAST: The last value (the one closest to the end of the file). */ -static struct per_var_data * +static struct statistic * last_create (struct pool *pool) { - struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd); + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); - return (struct per_var_data *) pvd; + return (struct statistic *) pvd; } static void -last_update (struct per_var_data *pvd, double w UNUSED, double x) +last_update (struct statistic *pvd, double w UNUSED, double x) { - struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd; + struct statistic_simple *stat = (struct statistic_simple *) pvd; stat->acc = x; } static double -last_get (const struct per_var_data *pvd) +last_get (const struct statistic *pvd) { - const struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd; + const struct statistic_simple *stat = (struct statistic_simple *) pvd; return stat->acc; } /* FIRST: The first value (the one closest to the start of the file). */ -static struct per_var_data * +static struct statistic * first_create (struct pool *pool) { - struct per_var_data_simple *pvd = pool_alloc (pool, sizeof *pvd); + struct statistic_simple *pvd = pool_alloc (pool, sizeof *pvd); pvd->acc = SYSMIS; - return (struct per_var_data *) pvd; + return (struct statistic *) pvd; } static void -first_update (struct per_var_data *pvd, double w UNUSED, double x) +first_update (struct statistic *pvd, double w UNUSED, double x) { - struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd; + struct statistic_simple *stat = (struct statistic_simple *) pvd; if (stat->acc == SYSMIS) stat->acc = x; } static double -first_get (const struct per_var_data *pvd) +first_get (const struct statistic *pvd) { - const struct per_var_data_simple *stat = (struct per_var_data_simple *) pvd; + const struct statistic_simple *stat = (struct statistic_simple *) pvd; return stat->acc; } /* Table of cell_specs */ const struct cell_spec cell_spec[n_MEANS_STATISTICS] = { - {N_("Mean"), "MEAN", default_create, default_update, arithmean_get}, - {N_("N"), "COUNT", default_create, default_update, n_get}, - {N_("Std. Deviation"), "STDDEV", default_create, default_update, stddev_get}, + {N_("Mean"), "MEAN", NULL , default_create, default_update, arithmean_get, default_destroy}, + {N_("N"), "COUNT", PIVOT_RC_COUNT, default_create, default_update, n_get, default_destroy}, + {N_("Std. Deviation"), "STDDEV", NULL , default_create, default_update, stddev_get, default_destroy}, #if 0 - {N_("Median"), "MEDIAN", default_create, default_update, NULL}, - {N_("Group Median"), "GMEDIAN", default_create, default_update, NULL}, + {N_("Median"), "MEDIAN", NULL , default_create, default_update, NULL, default_destroy}, + {N_("Group Median"), "GMEDIAN", NULL , default_create, default_update, NULL, default_destroy}, #endif - {N_("S.E. Mean"), "SEMEAN", default_create, default_update, semean_get}, - {N_("Sum"), "SUM", default_create, default_update, sum_get}, - {N_("Minimum"), "MIN", min_create, min_update, min_get}, - {N_("Maximum"), "MAX", max_create, max_update, max_get}, - {N_("Range"), "RANGE", range_create, range_update, range_get}, - {N_("Variance"), "VARIANCE", default_create, default_update, variance_get}, - {N_("Kurtosis"), "KURT", default_create, default_update, kurt_get}, - {N_("S.E. Kurt"), "SEKURT", default_create, default_update, sekurt_get}, - {N_("Skewness"), "SKEW", default_create, default_update, skew_get}, - {N_("S.E. Skew"), "SESKEW", default_create, default_update, seskew_get}, - {N_("First"), "FIRST", first_create, first_update, first_get}, - {N_("Last"), "LAST", last_create, last_update, last_get}, + {N_("S.E. Mean"), "SEMEAN", NULL , default_create, default_update, semean_get, default_destroy}, + {N_("Sum"), "SUM", NULL , default_create, default_update, sum_get, default_destroy}, + {N_("Minimum"), "MIN", NULL , min_create, min_update, min_get, simple_destroy}, + {N_("Maximum"), "MAX", NULL , max_create, max_update, max_get, simple_destroy}, + {N_("Range"), "RANGE", NULL , range_create, range_update, range_get, simple_destroy}, + {N_("Variance"), "VARIANCE", PIVOT_RC_OTHER, default_create, default_update, variance_get, default_destroy}, + {N_("Kurtosis"), "KURT", PIVOT_RC_OTHER, default_create, default_update, kurt_get, default_destroy}, + {N_("S.E. Kurt"), "SEKURT", PIVOT_RC_OTHER, default_create, default_update, sekurt_get, default_destroy}, + {N_("Skewness"), "SKEW", PIVOT_RC_OTHER, default_create, default_update, skew_get, default_destroy}, + {N_("S.E. Skew"), "SESKEW", PIVOT_RC_OTHER, default_create, default_update, seskew_get, default_destroy}, + {N_("First"), "FIRST", NULL , first_create, first_update, first_get, simple_destroy}, + {N_("Last"), "LAST", NULL , last_create, last_update, last_get, simple_destroy}, #if 0 - {N_("Percent N"), "NPCT", default_create, default_update, NULL}, - {N_("Percent Sum"), "SPCT", default_create, default_update, NULL}, + {N_("Percent N"), "NPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy}, + {N_("Percent Sum"), "SPCT", PIVOT_RC_PERCENT, default_create, default_update, NULL, default_destroy}, #endif - {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get}, - {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get} + {N_("Harmonic Mean"), "HARMONIC", NULL , harmonic_create, harmonic_update, harmonic_get, simple_destroy}, + {N_("Geom. Mean"), "GEOMETRIC", NULL , geometric_create, geometric_update, geometric_get, simple_destroy} };