1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2008, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #ifndef __ORDER_STATS_H__
18 #define __ORDER_STATS_H__
20 /* Support for order statistics.
22 The kth order statistic of a statistical sample is equal to its kth-smallest
23 value. The minimum is the first order statistic and the maximum is the
24 largest. This code and data structure supplies infrastructure for
25 higher-level statistics that rely on order statistics. It is a kind of
26 "abstract base class" that is not useful on its own.
28 This is implemented here as a kind of "partial frequency table". The
29 order_stats_accumulate() and order_stats_accumulate_idx() functions
30 effectively generate all of the frequency table entries for the variable,
31 one by one, and pass them to the "accumulate" function, if any. They can
32 also record pairs of frequency tables entries surrounding desired target
33 cumulative weights in 'k' data structures.
38 The common pattern for clients to use statistics based on order statistics
41 - Create the higher-level statistic with, for example, percentile_create().
43 - Feed in all the data with order_stats_accumulate() or
44 order_stats_accumulate_idx(). The data must be in sorted order: if
45 necessary, use one of the sorting functions from sort.h to sort them.
47 - Obtain the desired results by examining the higher-level data structure or
48 by calling an appropriate function, e.g. percentile_calculate().
50 - Destroy the data structure with statistic_destroy().
54 #include "data/missing-values.h"
55 #include "math/statistic.h"
60 /* A pair of adjacent frequency table entries.
66 /* Target cumulative weight.
67 Set by the client before invoking order_stats_accumulate{,_idx}. */
70 /* Lower order statistics. */
71 double cc; /* Largest cumulative weight <= tc. */
72 double c; /* Weight for data values equal to 'y'. */
73 double y; /* Data value. */
75 /* Upper order statistics. */
76 double cc_p1; /* Smallest cumulative weight > tc. */
77 double c_p1; /* Weight for data values equal to 'y_p1'. */
78 double y_p1; /* Data value. */
81 /* Order statistics calculation data structure. See the comment at the top of
82 this file for usage details. */
85 struct statistic parent;
87 void (*accumulate) (struct statistic *, const struct ccase *, double c, double cc, double y);
93 void order_stats_accumulate_idx (struct order_stats **os, size_t n_os,
94 struct casereader *reader,
97 void order_stats_accumulate (struct order_stats **os, size_t n_os,
99 const struct variable *weight_var,
100 const struct variable *data_var,
101 enum mv_class exclude);
103 /* Debugging support. */
104 void order_stats_dump (const struct order_stats *);