*w = (struct box_whisker) {
.parent = {
.parent = {
- .accumulate = acc,
.destroy = destroy,
},
+ .accumulate = acc,
},
.hinges = { hinges[0], hinges[1], hinges[2] },
.whiskers = { SYSMIS, hinges[2] },
void
histogram_add (struct histogram *h, double y, double c)
{
- struct statistic *stat = &h->parent;
- stat->accumulate (stat, NULL, c, 0, y);
-}
-
-static void
-acc (struct statistic *s, const struct ccase *cx UNUSED, double c, double cc UNUSED, double y)
-{
- struct histogram *hist = UP_CAST (s, struct histogram, parent);
-
- gsl_histogram_accumulate (hist->gsl_hist, y, c);
+ gsl_histogram_accumulate (h->gsl_hist, y, c);
}
static void
}
stat = &h->parent;
- stat->accumulate = acc;
stat->destroy = destroy;
return h;
*np = (struct np) {
.parent = {
.parent = {
- .accumulate = acc,
.destroy = destroy,
},
+ .accumulate = acc,
},
.n = n,
.mean = mean,
#include "math/order-stats.h"
+#include <math.h>
#include <string.h>
#include "data/casereader.h"
}
}
- if (stat->accumulate)
- stat->accumulate (stat, cx, c_i, cc_i, y_i);
-
- tos->cc = cc_i;
+ if (tos->accumulate)
+ tos->accumulate (stat, cx, c_i, cc_i, y_i);
}
}
order statistics in OS, taking data from case index DATA_IDX and weights
from case index WEIGHT_IDX. WEIGHT_IDX may be -1 to assume weight 1.
+ This function must be used only once per order_stats.
+
Takes ownership of READER.
Data values must be numeric and sorted in ascending order. Use
for (; (cx = casereader_read (reader)) != NULL; case_unref (cx))
{
const double weight = weight_idx == -1 ? 1.0 : case_num_idx (cx, weight_idx);
- if (weight == SYSMIS)
+ if (weight == SYSMIS || weight <= 0)
continue;
const double this_value = case_num_idx (cx, data_idx);
- assert (this_value >= prev_value);
+ if (!isfinite (this_value) || this_value == SYSMIS)
+ continue;
- if (prev_value == -DBL_MAX || prev_value == this_value)
- c_i += weight;
+ if (!prev_cx || this_value > prev_value)
+ {
+ if (prev_cx)
+ update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
+ prev_value = this_value;
+ c_i = weight;
+ }
+ else
+ {
+ /* Data values must be sorted. */
+ assert (this_value == prev_value);
+
+ c_i += weight;
+ }
- if (prev_value > -DBL_MAX && this_value > prev_value)
- {
- update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
- c_i = weight;
- }
-
- case_unref (prev_cx);
cc_i += weight;
- prev_value = this_value;
+ case_unref (prev_cx);
prev_cx = case_ref (cx);
}
-
- update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
- case_unref (prev_cx);
+ if (prev_cx)
+ {
+ update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
+ case_unref (prev_cx);
+ }
casereader_destroy (reader);
}
WEIGHT_VAR. Drops cases for which the value of DATA_VAR is missing
according to EXCLUDE. WEIGHT_VAR may be NULL to assume weight 1.
+ This function must be used only once per order_stats.
+
Takes ownership of READER.
DATA_VAR must be numeric and sorted in ascending order. Use
/* Support for order statistics.
- This data structure supplies infrastructure for higher-level statistics that
- rely on order statistics. It is a kind of "abstract base class" that is not
- useful on its own. The common pattern for using the statistics based on it
+ The kth order statistic of a statistical sample is equal to its kth-smallest
+ value. The minimum is the first order statistic and the maximum is the
+ largest. This code and data structure supplies infrastructure for
+ higher-level statistics that rely on order statistics. It is a kind of
+ "abstract base class" that is not useful on its own.
+
+ This is implemented here as a kind of "partial frequency table". The
+ order_stats_accumulate() and order_stats_accumulate_idx() functions
+ effectively generate all of the frequency table entries for the variable,
+ one by one, and pass them to the "accumulate" function, if any. They can
+ also record pairs of frequency tables entries surrounding desired target
+ cumulative weights in 'k' data structures.
+
+ Client use
+ ==========
+
+ The common pattern for clients to use statistics based on order statistics
is this:
- Create the higher-level statistic with, for example, percentile_create().
struct casereader;
struct variable;
-/*
- cc <= tc < cc_p1
+/* A pair of adjacent frequency table entries.
+
+ cc <= tc < cc_p1
*/
struct k
{
+ /* Target cumulative weight.
+ Set by the client before invoking order_stats_accumulate{,_idx}. */
double tc;
- double cc;
- double cc_p1;
- double c;
- double c_p1;
- double y;
- double y_p1;
+
+ /* Lower order statistics. */
+ double cc; /* Largest cumulative weight <= tc. */
+ double c; /* Weight for data values equal to 'y'. */
+ double y; /* Data value. */
+
+ /* Upper order statistics. */
+ double cc_p1; /* Smallest cumulative weight > tc. */
+ double c_p1; /* Weight for data values equal to 'y_p1'. */
+ double y_p1; /* Data value. */
};
/* Order statistics calculation data structure. See the comment at the top of
struct order_stats
{
struct statistic parent;
- int n_k;
- struct k *k;
- double cc;
+ void (*accumulate) (struct statistic *, const struct ccase *, double c, double cc, double y);
+
+ struct k *k;
+ size_t n_k;
};
void order_stats_accumulate_idx (struct order_stats **os, size_t n_os,
free (ptl);
}
-
/* Create the Pth percentile.
W is the total sum of weights in the data set.
*/
struct percentile *
percentile_create (double p, double W)
{
- struct percentile *ptl = XZALLOC (struct percentile);
- struct order_stats *os = &ptl->parent;
- struct statistic *stat = &os->parent;
-
assert (p >= 0);
assert (p <= 1.0);
- ptl->ptile = p;
- ptl->w = W;
-
- os->n_k = 2;
- os->k = ptl->k;
- os->k[0].tc = W * p;
- os->k[1].tc = (W + 1.0) * p;
-
- ptl->g1 = ptl->g1_star = SYSMIS;
- ptl->g2 = ptl->g2_star = SYSMIS;
-
- os->k[1].y_p1 = os->k[1].y = SYSMIS;
- os->k[0].y_p1 = os->k[0].y = SYSMIS;
-
- stat->destroy = destroy;
-
+ struct percentile *ptl = xmalloc (sizeof *ptl);
+ *ptl = (struct percentile) {
+ .parent = {
+ .parent = {
+ .destroy = destroy
+ },
+ .k = ptl->k,
+ .n_k = 2,
+ },
+ .ptile = p,
+ .w = W,
+ .g1 = SYSMIS,
+ .g1_star = SYSMIS,
+ .g2 = SYSMIS,
+ .g2_star = SYSMIS,
+ .k[0] = { .tc = W * p, .y = SYSMIS, .y_p1 = SYSMIS },
+ .k[1] = { .tc = (W + 1.0) * p, .y = SYSMIS, .y_p1 = SYSMIS },
+ };
return ptl;
}
-
sw->warned = false;
- stat->accumulate = acc;
+ os->accumulate = acc;
stat->destroy = destroy;
return sw;
struct statistic
{
- void (*accumulate) (struct statistic *, const struct ccase *cx, double c, double cc, double y);
void (*destroy) (struct statistic *);
};
*tm = (struct trimmed_mean) {
.parent = {
.parent = {
- .accumulate = acc,
.destroy = destroy,
},
+ .accumulate = acc,
.k = tm->k,
.n_k = 2,
},