X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Forder-stats.h;h=4bb69383fb7944a22b12f0ce3d8328e2d27abfff;hb=499552917fce3a3da9d0fa826e1589a1c62acf27;hp=03011ea32c4116a3045e45a61e532c7dfa57bd87;hpb=46ad74de6bcf18e2a59b208eee18648f27b08272;p=pspp diff --git a/src/math/order-stats.h b/src/math/order-stats.h index 03011ea32c..4bb69383fb 100644 --- a/src/math/order-stats.h +++ b/src/math/order-stats.h @@ -17,6 +17,39 @@ #ifndef __ORDER_STATS_H__ #define __ORDER_STATS_H__ +/* Support for order statistics. + + The kth order statistic of a statistical sample is equal to its kth-smallest + value. The minimum is the first order statistic and the maximum is the + largest. This code and data structure supplies infrastructure for + higher-level statistics that rely on order statistics. It is a kind of + "abstract base class" that is not useful on its own. + + This is implemented here as a kind of "partial frequency table". The + order_stats_accumulate() and order_stats_accumulate_idx() functions + effectively generate all of the frequency table entries for the variable, + one by one, and pass them to the "accumulate" function, if any. They can + also record pairs of frequency tables entries surrounding desired target + cumulative weights in 'k' data structures. + + Client use + ========== + + The common pattern for clients to use statistics based on order statistics + is this: + + - Create the higher-level statistic with, for example, percentile_create(). + + - Feed in all the data with order_stats_accumulate() or + order_stats_accumulate_idx(). The data must be in sorted order: if + necessary, use one of the sorting functions from sort.h to sort them. + + - Obtain the desired results by examining the higher-level data structure or + by calling an appropriate function, e.g. percentile_calculate(). + + - Destroy the data structure with statistic_destroy(). +*/ + #include #include "data/missing-values.h" #include "math/statistic.h" @@ -24,45 +57,50 @@ struct casereader; struct variable; -/* - cc <= tc < cc_p1 +/* A pair of adjacent frequency table entries. + + cc <= tc < cc_p1 */ struct k { + /* Target cumulative weight. + Set by the client before invoking order_stats_accumulate{,_idx}. */ double tc; - double cc; - double cc_p1; - double c; - double c_p1; - double y; - double y_p1; -}; + /* Lower order statistics. */ + double cc; /* Largest cumulative weight <= tc. */ + double c; /* Weight for data values equal to 'y'. */ + double y; /* Data value. */ + + /* Upper order statistics. */ + double cc_p1; /* Smallest cumulative weight > tc. */ + double c_p1; /* Weight for data values equal to 'y_p1'. */ + double y_p1; /* Data value. */ +}; +/* Order statistics calculation data structure. See the comment at the top of + this file for usage details. */ struct order_stats { struct statistic parent; - int n_k; - struct k *k; - - double cc; -}; -enum mv_class; - -void order_stats_dump (const struct order_stats *os); - -void -order_stats_accumulate_idx (struct order_stats **os, size_t nos, - struct casereader *reader, - int wt_idx, - int val_idx); + void (*accumulate) (struct statistic *, const struct ccase *, double c, double cc, double y); + struct k *k; + size_t n_k; +}; -void order_stats_accumulate (struct order_stats **ptl, size_t nos, - struct casereader *reader, - const struct variable *wv, - const struct variable *var, +void order_stats_accumulate_idx (struct order_stats **os, size_t n_os, + struct casereader *reader, + int weight_idx, + int data_idx); +void order_stats_accumulate (struct order_stats **os, size_t n_os, + struct casereader *, + const struct variable *weight_var, + const struct variable *data_var, enum mv_class exclude); +/* Debugging support. */ +void order_stats_dump (const struct order_stats *); + #endif