/* PSPP - a program for statistical analysis.
- Copyright (C) 2008 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
-#include "order-stats.h"
-#include <libpspp/assertion.h>
-#include <data/val-type.h>
-#include <gl/xalloc.h>
-#include <data/variable.h>
-#include <data/casereader.h>
+
+#include "math/order-stats.h"
+
+#include <math.h>
#include <string.h>
+#include "data/casereader.h"
+#include "data/val-type.h"
+#include "data/variable.h"
+#include "libpspp/assertion.h"
+
+#include "gl/xalloc.h"
+
#if 0
#include <stdio.h>
#endif
-static void
-update_k_lower (struct k *kk,
- double y_i, double c_i, double cc_i)
-{
- if ( cc_i <= kk->tc )
- {
- kk->cc = cc_i;
- kk->c = c_i;
- kk->y = y_i;
- }
-}
-
-
-static void
-update_k_upper (struct k *kk,
- double y_i, double c_i, double cc_i)
-{
- if ( cc_i > kk->tc && kk->c_p1 == 0)
- {
- kk->cc_p1 = cc_i;
- kk->c_p1 = c_i;
- kk->y_p1 = y_i;
- }
-}
-
-
static void
update_k_values (const struct ccase *cx, double y_i, double c_i, double cc_i,
struct order_stats **os, size_t n_os)
{
- int j;
-
- for (j = 0 ; j < n_os ; ++j)
+ for (size_t j = 0; j < n_os; ++j)
{
- int k;
struct order_stats *tos = os[j];
- struct statistic *stat = (struct statistic *) tos;
- for (k = 0 ; k < tos->n_k; ++k)
+ struct statistic *stat = &tos->parent;
+
+ for (struct k *k = tos->k; k < &tos->k[tos->n_k]; ++k)
{
- struct k *myk = &tos->k[k];
- update_k_lower (myk, y_i, c_i, cc_i);
- update_k_upper (myk, y_i, c_i, cc_i);
+ /* Update 'k' lower values. */
+ if (cc_i <= k->tc)
+ {
+ k->cc = cc_i;
+ k->c = c_i;
+ k->y = y_i;
+ }
+
+ /* Update 'k' upper values. */
+ if (cc_i > k->tc && k->c_p1 == 0)
+ {
+ k->cc_p1 = cc_i;
+ k->c_p1 = c_i;
+ k->y_p1 = y_i;
+ }
}
- if ( stat->accumulate )
- stat->accumulate (stat, cx, c_i, cc_i, y_i);
-
- tos->cc = cc_i;
+ if (tos->accumulate)
+ tos->accumulate (stat, cx, c_i, cc_i, y_i);
}
}
+/* Reads all the cases from READER and accumulates their data into the N_OS
+ order statistics in OS, taking data from case index DATA_IDX and weights
+ from case index WEIGHT_IDX. WEIGHT_IDX may be -1 to assume weight 1.
+
+ This function must be used only once per order_stats.
+ Takes ownership of READER.
+
+ Data values must be numeric and sorted in ascending order. Use
+ sort_execute_1var() or related functions to sort unsorted data before
+ passing it to this function. */
void
-order_stats_accumulate (struct order_stats **os, size_t nos,
- struct casereader *reader,
- const struct variable *wv,
- const struct variable *var,
- enum mv_class exclude)
+order_stats_accumulate_idx (struct order_stats **os, size_t n_os,
+ struct casereader *reader,
+ int weight_idx, int data_idx)
{
- struct ccase cx;
- struct ccase prev_cx;
+ struct ccase *cx;
+ struct ccase *prev_cx = NULL;
double prev_value = -DBL_MAX;
double cc_i = 0;
double c_i = 0;
- case_nullify (&prev_cx);
-
- for (; casereader_read (reader, &cx); case_destroy (&cx))
+ for (; (cx = casereader_read (reader)) != NULL; case_unref (cx))
{
- const double weight = wv ? case_data (&cx, wv)->f : 1.0;
- const double this_value = case_data (&cx, var)->f;
-
- /* The casereader MUST be sorted */
- assert (this_value >= prev_value);
-
- if ( var_is_value_missing (var, case_data (&cx, var), exclude))
- continue;
-
- case_destroy (&prev_cx);
-
- if ( prev_value == -DBL_MAX || prev_value == this_value)
- c_i += weight;
-
- if ( prev_value > -DBL_MAX && this_value > prev_value)
- {
- update_k_values (&prev_cx, prev_value, c_i, cc_i, os, nos);
- c_i = weight;
- }
+ const double weight = weight_idx == -1 ? 1.0 : case_num_idx (cx, weight_idx);
+ if (weight == SYSMIS || weight <= 0)
+ continue;
+
+ const double this_value = case_num_idx (cx, data_idx);
+ if (!isfinite (this_value) || this_value == SYSMIS)
+ continue;
+
+ if (!prev_cx || this_value > prev_value)
+ {
+ if (prev_cx)
+ update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
+ prev_value = this_value;
+ c_i = weight;
+ }
+ else
+ {
+ /* Data values must be sorted. */
+ assert (this_value == prev_value);
+
+ c_i += weight;
+ }
cc_i += weight;
- prev_value = this_value;
- case_clone (&prev_cx, &cx);
+ case_unref (prev_cx);
+ prev_cx = case_ref (cx);
+ }
+ if (prev_cx)
+ {
+ update_k_values (prev_cx, prev_value, c_i, cc_i, os, n_os);
+ case_unref (prev_cx);
}
-
- update_k_values (&prev_cx, prev_value, c_i, cc_i, os, nos);
- case_destroy (&prev_cx);
casereader_destroy (reader);
}
+/* Reads all the cases from READER and accumulates their data into the N_OS
+ order statistics in OS, taking data from DATA_VAR and weights from
+ WEIGHT_VAR. Drops cases for which the value of DATA_VAR is missing
+ according to EXCLUDE. WEIGHT_VAR may be NULL to assume weight 1.
+
+ This function must be used only once per order_stats.
+
+ Takes ownership of READER.
+ DATA_VAR must be numeric and sorted in ascending order. Use
+ sort_execute_1var() or related functions to sort unsorted data before
+ passing it to this function. */
+void
+order_stats_accumulate (struct order_stats **os, size_t n_os,
+ struct casereader *reader,
+ const struct variable *weight_var,
+ const struct variable *data_var,
+ enum mv_class exclude)
+{
+ reader = casereader_create_filter_missing (reader, &data_var, 1,
+ exclude, NULL, NULL);
+ order_stats_accumulate_idx (os, n_os, reader,
+ weight_var ? var_get_case_index (weight_var) : -1,
+ var_get_case_index (data_var));
+}