X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fmath%2Forder-stats.h;h=4bb69383fb7944a22b12f0ce3d8328e2d27abfff;hb=499552917fce3a3da9d0fa826e1589a1c62acf27;hp=03011ea32c4116a3045e45a61e532c7dfa57bd87;hpb=46ad74de6bcf18e2a59b208eee18648f27b08272;p=pspp

diff --git a/src/math/order-stats.h b/src/math/order-stats.h
index 03011ea32c..4bb69383fb 100644
--- a/src/math/order-stats.h
+++ b/src/math/order-stats.h
@@ -17,6 +17,39 @@
 #ifndef __ORDER_STATS_H__
 #define __ORDER_STATS_H__
 
+/* Support for order statistics.
+
+   The kth order statistic of a statistical sample is equal to its kth-smallest
+   value.  The minimum is the first order statistic and the maximum is the
+   largest.  This code and data structure supplies infrastructure for
+   higher-level statistics that rely on order statistics.  It is a kind of
+   "abstract base class" that is not useful on its own.
+
+   This is implemented here as a kind of "partial frequency table".  The
+   order_stats_accumulate() and order_stats_accumulate_idx() functions
+   effectively generate all of the frequency table entries for the variable,
+   one by one, and pass them to the "accumulate" function, if any.  They can
+   also record pairs of frequency tables entries surrounding desired target
+   cumulative weights in 'k' data structures.
+
+   Client use
+   ==========
+
+   The common pattern for clients to use statistics based on order statistics
+   is this:
+
+   - Create the higher-level statistic with, for example, percentile_create().
+
+   - Feed in all the data with order_stats_accumulate() or
+     order_stats_accumulate_idx(). The data must be in sorted order: if
+     necessary, use one of the sorting functions from sort.h to sort them.
+
+   - Obtain the desired results by examining the higher-level data structure or
+     by calling an appropriate function, e.g. percentile_calculate().
+
+   - Destroy the data structure with statistic_destroy().
+*/
+
 #include <stddef.h>
 #include "data/missing-values.h"
 #include "math/statistic.h"
@@ -24,45 +57,50 @@
 struct casereader;
 struct variable;
 
-/*
-  cc <= tc < cc_p1
+/* A pair of adjacent frequency table entries.
+
+   cc <= tc < cc_p1
 */
 struct k
 {
+  /* Target cumulative weight.
+     Set by the client before invoking order_stats_accumulate{,_idx}. */
   double tc;
-  double cc;
-  double cc_p1;
-  double c;
-  double c_p1;
-  double y;
-  double y_p1;
-};
 
+  /* Lower order statistics. */
+  double cc;                    /* Largest cumulative weight <= tc. */
+  double c;                     /* Weight for data values equal to 'y'. */
+  double y;                     /* Data value. */
+
+  /* Upper order statistics. */
+  double cc_p1;                 /* Smallest cumulative weight > tc. */
+  double c_p1;                  /* Weight for data values equal to 'y_p1'. */
+  double y_p1;                  /* Data value. */
+};
 
+/* Order statistics calculation data structure.  See the comment at the top of
+   this file for usage details. */
 struct order_stats
 {
   struct statistic parent;
-  int n_k;
-  struct k *k;
-
-  double cc;
-};
 
-enum mv_class;
-
-void order_stats_dump (const struct order_stats *os);
-
-void
-order_stats_accumulate_idx (struct order_stats **os, size_t nos,
-                            struct casereader *reader,
-                            int wt_idx,
-                            int val_idx);
+  void (*accumulate) (struct statistic *, const struct ccase *, double c, double cc, double y);
 
+  struct k *k;
+  size_t n_k;
+};
 
-void order_stats_accumulate (struct order_stats **ptl, size_t nos,
-			     struct casereader *reader,
-			     const struct variable *wv,
-			     const struct variable *var,
+void order_stats_accumulate_idx (struct order_stats **os, size_t n_os,
+                                 struct casereader *reader,
+                                 int weight_idx,
+                                 int data_idx);
+void order_stats_accumulate (struct order_stats **os, size_t n_os,
+			     struct casereader *,
+			     const struct variable *weight_var,
+			     const struct variable *data_var,
 			     enum mv_class exclude);
 
+/* Debugging support. */
+void order_stats_dump (const struct order_stats *);
+
 #endif