1 /* PSPP - a program for statistical analysis. -*-c-*-
2 Copyright (C) 2010, 2011, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "language/stats/runs.h"
23 #include <gsl/gsl_cdf.h>
26 #include "data/casegrouper.h"
27 #include "data/casereader.h"
28 #include "data/casewriter.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/format.h"
32 #include "data/subcase.h"
33 #include "data/variable.h"
34 #include "libpspp/message.h"
35 #include "libpspp/misc.h"
36 #include "math/percentiles.h"
37 #include "math/sort.h"
38 #include "output/pivot-table.h"
41 #define N_(msgid) msgid
42 #define _(msgid) gettext (msgid)
47 /* The value used to dichotimise the data */
50 /* The number of cases not less than cutpoint */
53 /* The number of cases less than cutpoint */
56 /* The sum of np and nn */
59 /* The number of runs */
62 /* The sign of the last case seen */
68 /* Return the Z statistic representing the assympototic
69 distribution of the number of runs */
71 runs_statistic (const struct run_state *rs)
75 double mu = 2 * rs->np * rs->nn;
76 mu /= rs->np + rs->nn;
91 sigma = 2 * rs->np * rs->nn;
92 sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
93 sigma /= pow2 (rs->np + rs->nn);
94 sigma /= rs->np + rs->nn - 1.0;
102 static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
105 runs_execute (const struct dataset *ds,
106 struct casereader *input,
107 enum mv_class exclude,
108 const struct npar_test *test,
114 const struct dictionary *dict = dataset_dict (ds);
115 const struct variable *weight = dict_get_weight (dict);
117 struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
118 struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
119 struct run_state *rs = XCALLOC (otp->n_vars, struct run_state);
125 for (v = 0; v < otp->n_vars; ++v)
127 bool multimodal = false;
128 struct run_state *run = &rs[v];
130 struct casereader *group = NULL;
131 struct casegrouper *grouper;
132 struct casereader *reader = casereader_clone (input);
133 const struct variable *var = otp->vars[v];
135 reader = sort_execute_1var (reader, var);
137 grouper = casegrouper_create_vars (reader, &var, 1);
139 while (casegrouper_get_next_group (grouper, &group))
144 for (; (c = casereader_read (group)); case_unref (c))
146 const double w = weight ? case_data (c, weight)->f: 1.0;
147 const union value *val = case_data (c, var);
148 if (var_is_value_missing (var, val, exclude))
158 else if (cc == last_cc)
161 if (x > run->cutpoint)
165 casereader_destroy (group);
167 casegrouper_destroy (grouper);
169 msg (MW, _("Multiple modes exist for variable `%s'. "
170 "Using %.*g as the threshold value."),
171 var_get_name (var), DBL_DIG + 1, run->cutpoint);
177 for (v = 0; v < otp->n_vars; ++v)
181 struct run_state *run = &rs[v];
182 struct casereader *reader = casereader_clone (input);
183 const struct variable *var = otp->vars[v];
184 struct casewriter *writer;
185 struct percentile *median;
186 struct order_stats *os;
188 subcase_init_var (&sc, var, SC_ASCEND);
189 writer = sort_create_writer (&sc, casereader_get_proto (reader));
191 for (; (c = casereader_read (reader));)
193 const union value *val = case_data (c, var);
194 const double w = weight ? case_data (c, weight)->f: 1.0;
195 if (var_is_value_missing (var, val, exclude))
202 casewriter_write (writer, c);
204 subcase_destroy (&sc);
205 casereader_destroy (reader);
206 reader = casewriter_make_reader (writer);
208 median = percentile_create (0.5, cc);
209 os = &median->parent;
211 order_stats_accumulate (&os, 1,
217 run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
218 statistic_destroy (&median->parent.parent);
224 struct casereader *reader = casereader_clone (input);
225 for (; (c = casereader_read (reader)); case_unref (c))
227 const double w = weight ? case_data (c, weight)->f: 1.0;
228 for (v = 0; v < otp->n_vars; ++v)
230 const struct variable *var = otp->vars[v];
231 const union value *val = case_data (c, var);
232 const double x = val->f;
233 struct run_state *run = &rs[v];
235 if (var_is_value_missing (var, val, exclude))
238 run->cutpoint += x * w;
242 casereader_destroy (reader);
243 for (v = 0; v < otp->n_vars; ++v)
245 struct run_state *run = &rs[v];
246 run->cutpoint /= run->n;
252 for (v = 0; v < otp->n_vars; ++v)
254 struct run_state *run = &rs[v];
255 run->cutpoint = rt->cutpoint;
261 for (; (c = casereader_read (input)); case_unref (c))
263 const double w = weight ? case_data (c, weight)->f: 1.0;
265 for (v = 0; v < otp->n_vars; ++v)
267 struct run_state *run = &rs[v];
268 const struct variable *var = otp->vars[v];
269 const union value *val = case_data (c, var);
271 double d = x - run->cutpoint;
274 if (var_is_value_missing (var, val, exclude))
288 if (sign != run->last_sign)
291 run->last_sign = sign;
294 casereader_destroy (input);
296 for (v = 0; v < otp->n_vars; ++v)
298 struct run_state *run = &rs[v];
299 run->n = run->np + run->nn;
302 show_runs_result (rt, rs, dict);
310 show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
312 const struct one_sample_test *otp = &rt->parent;
314 struct pivot_table *table = pivot_table_create (N_("Runs Test"));
315 pivot_table_set_weight_var (table, dict_get_weight (dict));
317 pivot_dimension_create (
318 table, PIVOT_AXIS_ROW, N_("Statistics"),
319 (rt->cp_mode == CP_CUSTOM ? N_("Test Value")
320 : rt->cp_mode == CP_MODE ? N_("Test Value (mode)")
321 : rt->cp_mode == CP_MEAN ? N_("Test Value (mean)")
322 : N_("Test Value (median)")), PIVOT_RC_OTHER,
323 N_("Cases < Test Value"), PIVOT_RC_COUNT,
324 N_("Cases ≥ Test Value"), PIVOT_RC_COUNT,
325 N_("Total Cases"), PIVOT_RC_COUNT,
326 N_("Number of Runs"), PIVOT_RC_INTEGER,
327 N_("Z"), PIVOT_RC_OTHER,
328 N_("Asymp. Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE);
330 struct pivot_dimension *variables = pivot_dimension_create (
331 table, PIVOT_AXIS_COLUMN, N_("Variable"));
333 for (size_t i = 0 ; i < otp->n_vars; ++i)
335 const struct run_state *run = &rs[i];
337 int col = pivot_category_create_leaf (
338 variables->root, pivot_value_new_variable (otp->vars[i]));
340 double z = runs_statistic (run);
349 2.0 * (1.0 - gsl_cdf_ugaussian_P (fabs (z))),
352 for (int row = 0; row < sizeof rows / sizeof *rows; row++)
353 pivot_table_put2 (table, row, col, pivot_value_new_number (rows[row]));
356 pivot_table_submit (table);