1 /* PSPP - a program for statistical analysis. -*-c-*-
2 Copyright (C) 2010, 2011, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "language/stats/runs.h"
23 #include <gsl/gsl_cdf.h>
26 #include "data/casegrouper.h"
27 #include "data/casereader.h"
28 #include "data/casewriter.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/format.h"
32 #include "data/subcase.h"
33 #include "data/variable.h"
34 #include "libpspp/message.h"
35 #include "libpspp/misc.h"
36 #include "math/percentiles.h"
37 #include "math/sort.h"
38 #include "output/tab.h"
41 #define _(msgid) gettext (msgid)
46 /* The value used to dichotimise the data */
49 /* The number of cases not less than cutpoint */
52 /* The number of cases less than cutpoint */
55 /* The sum of np and nn */
58 /* The number of runs */
61 /* The sign of the last case seen */
67 /* Return the Z statistic representing the assympototic
68 distribution of the number of runs */
70 runs_statistic (const struct run_state *rs)
74 double mu = 2 * rs->np * rs->nn;
75 mu /= rs->np + rs->nn;
90 sigma = 2 * rs->np * rs->nn;
91 sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
92 sigma /= pow2 (rs->np + rs->nn);
93 sigma /= rs->np + rs->nn - 1.0;
101 static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
104 runs_execute (const struct dataset *ds,
105 struct casereader *input,
106 enum mv_class exclude,
107 const struct npar_test *test,
113 const struct dictionary *dict = dataset_dict (ds);
114 const struct variable *weight = dict_get_weight (dict);
116 struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
117 struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
118 struct run_state *rs = xcalloc (otp->n_vars, sizeof (*rs));
120 switch ( rt->cp_mode)
124 for (v = 0; v < otp->n_vars; ++v)
126 bool multimodal = false;
127 struct run_state *run = &rs[v];
129 struct casereader *group = NULL;
130 struct casegrouper *grouper;
131 struct casereader *reader = casereader_clone (input);
132 const struct variable *var = otp->vars[v];
134 reader = sort_execute_1var (reader, var);
136 grouper = casegrouper_create_vars (reader, &var, 1);
138 while (casegrouper_get_next_group (grouper, &group))
143 for (; (c = casereader_read (group)); case_unref (c))
145 const double w = weight ? case_data (c, weight)->f: 1.0;
146 const union value *val = case_data (c, var);
147 if ( var_is_value_missing (var, val, exclude))
157 else if ( cc == last_cc)
160 if ( x > run->cutpoint)
164 casereader_destroy (group);
166 casegrouper_destroy (grouper);
168 msg (MW, _("Multiple modes exist for variable `%s'. "
169 "Using %.*g as the threshold value."),
170 var_get_name (var), DBL_DIG + 1, run->cutpoint);
176 for (v = 0; v < otp->n_vars; ++v)
180 struct run_state *run = &rs[v];
181 struct casereader *reader = casereader_clone (input);
182 const struct variable *var = otp->vars[v];
183 struct casewriter *writer;
184 struct percentile *median;
185 struct order_stats *os;
187 subcase_init_var (&sc, var, SC_ASCEND);
188 writer = sort_create_writer (&sc, casereader_get_proto (reader));
190 for (; (c = casereader_read (reader)); )
192 const union value *val = case_data (c, var);
193 const double w = weight ? case_data (c, weight)->f: 1.0;
194 if ( var_is_value_missing (var, val, exclude))
201 casewriter_write (writer, c);
203 subcase_destroy (&sc);
204 casereader_destroy (reader);
205 reader = casewriter_make_reader (writer);
207 median = percentile_create (0.5, cc);
208 os = &median->parent;
210 order_stats_accumulate (&os, 1,
216 run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
217 statistic_destroy (&median->parent.parent);
223 struct casereader *reader = casereader_clone (input);
224 for (; (c = casereader_read (reader)); case_unref (c))
226 const double w = weight ? case_data (c, weight)->f: 1.0;
227 for (v = 0; v < otp->n_vars; ++v)
229 const struct variable *var = otp->vars[v];
230 const union value *val = case_data (c, var);
231 const double x = val->f;
232 struct run_state *run = &rs[v];
234 if ( var_is_value_missing (var, val, exclude))
237 run->cutpoint += x * w;
241 casereader_destroy (reader);
242 for (v = 0; v < otp->n_vars; ++v)
244 struct run_state *run = &rs[v];
245 run->cutpoint /= run->n;
251 for (v = 0; v < otp->n_vars; ++v)
253 struct run_state *run = &rs[v];
254 run->cutpoint = rt->cutpoint;
260 for (; (c = casereader_read (input)); case_unref (c))
262 const double w = weight ? case_data (c, weight)->f: 1.0;
264 for (v = 0; v < otp->n_vars; ++v)
266 struct run_state *run = &rs[v];
267 const struct variable *var = otp->vars[v];
268 const union value *val = case_data (c, var);
270 double d = x - run->cutpoint;
273 if ( var_is_value_missing (var, val, exclude))
287 if (sign != run->last_sign)
290 run->last_sign = sign;
293 casereader_destroy (input);
295 for (v = 0; v < otp->n_vars; ++v)
297 struct run_state *run = &rs[v];
298 run->n = run->np + run->nn;
301 show_runs_result (rt, rs, dict);
309 show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
311 const struct variable *weight = dict_get_weight (dict);
312 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
314 const struct one_sample_test *otp = &rt->parent;
317 const int row_headers = 1;
318 const int column_headers = 1;
319 struct tab_table *table =
320 tab_create (row_headers + otp->n_vars, column_headers + 7);
321 tab_set_format (table, RC_WEIGHT, wfmt);
323 tab_headers (table, row_headers, 0, column_headers, 0);
325 tab_title (table, _("Runs Test"));
327 /* Box around the table and vertical lines inside*/
328 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
329 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
331 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
332 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
334 for (i = 0 ; i < otp->n_vars; ++i)
336 const struct run_state *run = &rs[i];
338 double z = runs_statistic (run);
340 tab_text (table, row_headers + i, 0,
341 TAT_TITLE | TAB_CENTER ,
342 var_to_string (otp->vars[i]));
344 tab_double (table, row_headers +i, 1, 0,
345 run->cutpoint, NULL, RC_OTHER);
347 tab_double (table, row_headers +i, 2, 0,
348 run->nn, NULL, RC_WEIGHT);
350 tab_double (table, row_headers +i, 3, 0,
351 run->np, NULL, RC_WEIGHT);
353 tab_double (table, row_headers +i, 4, 0,
354 run->n, NULL, RC_WEIGHT);
356 tab_double (table, row_headers +i, 5, 0,
357 run->runs, NULL, RC_INTEGER);
359 tab_double (table, row_headers +i, 6, 0,
362 tab_double (table, row_headers +i, 7, 0,
363 2.0 * (1.0 - gsl_cdf_ugaussian_P (z)), NULL, RC_PVALUE);
366 switch ( rt->cp_mode)
369 tab_text (table, 0, column_headers ,
370 TAT_TITLE | TAB_LEFT , _("Test Value"));
373 tab_text (table, 0, column_headers ,
374 TAT_TITLE | TAB_LEFT , _("Test Value (mode)"));
377 tab_text (table, 0, column_headers ,
378 TAT_TITLE | TAB_LEFT , _("Test Value (mean)"));
381 tab_text (table, 0, column_headers ,
382 TAT_TITLE | TAB_LEFT , _("Test Value (median)"));
386 tab_text (table, 0, column_headers + 1,
387 TAT_TITLE | TAB_LEFT , _("Cases < Test Value"));
389 tab_text (table, 0, column_headers + 2,
390 TAT_TITLE | TAB_LEFT , _("Cases ≥ Test Value"));
392 tab_text (table, 0, column_headers + 3,
393 TAT_TITLE | TAB_LEFT , _("Total Cases"));
395 tab_text (table, 0, column_headers + 4,
396 TAT_TITLE | TAB_LEFT , _("Number of Runs"));
398 tab_text (table, 0, column_headers + 5,
399 TAT_TITLE | TAB_LEFT , _("Z"));
401 tab_text (table, 0, column_headers + 6,
402 TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)"));