1 /* PSPP - a program for statistical analysis. -*-c-*-
2 Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "language/stats/runs.h"
22 #include <gsl/gsl_cdf.h>
25 #include "data/casegrouper.h"
26 #include "data/casereader.h"
27 #include "data/casewriter.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/format.h"
31 #include "data/subcase.h"
32 #include "data/variable.h"
33 #include "libpspp/message.h"
34 #include "libpspp/misc.h"
35 #include "math/percentiles.h"
36 #include "math/sort.h"
37 #include "output/tab.h"
40 #define _(msgid) gettext (msgid)
45 /* The value used to dichotimise the data */
48 /* The number of cases not less than cutpoint */
51 /* The number of cases less than cutpoint */
54 /* The sum of np and nn */
57 /* The number of runs */
60 /* The sign of the last case seen */
66 /* Return the Z statistic representing the assympototic
67 distribution of the number of runs */
69 runs_statistic (const struct run_state *rs)
73 double mu = 2 * rs->np * rs->nn;
74 mu /= rs->np + rs->nn;
89 sigma = 2 * rs->np * rs->nn;
90 sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
91 sigma /= pow2 (rs->np + rs->nn);
92 sigma /= rs->np + rs->nn - 1.0;
100 static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
103 runs_execute (const struct dataset *ds,
104 struct casereader *input,
105 enum mv_class exclude,
106 const struct npar_test *test,
112 const struct dictionary *dict = dataset_dict (ds);
113 const struct variable *weight = dict_get_weight (dict);
115 struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
116 struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
117 struct run_state *rs = xcalloc (otp->n_vars, sizeof (*rs));
119 switch ( rt->cp_mode)
123 for (v = 0; v < otp->n_vars; ++v)
125 bool multimodal = false;
126 struct run_state *run = &rs[v];
128 struct casereader *group = NULL;
129 struct casegrouper *grouper;
130 struct casereader *reader = casereader_clone (input);
131 const struct variable *var = otp->vars[v];
133 reader = sort_execute_1var (reader, var);
135 grouper = casegrouper_create_vars (reader, &var, 1);
137 while (casegrouper_get_next_group (grouper, &group))
142 for (; (c = casereader_read (group)); case_unref (c))
144 const double w = weight ? case_data (c, weight)->f: 1.0;
145 const union value *val = case_data (c, var);
146 if ( var_is_value_missing (var, val, exclude))
156 else if ( cc == last_cc)
159 if ( x > run->cutpoint)
163 casereader_destroy (group);
165 casegrouper_destroy (grouper);
167 msg (MW, _("Multiple modes exist for variable `%s'. Using %g as the threshold value."),
168 var_get_name (var), run->cutpoint);
174 for (v = 0; v < otp->n_vars; ++v)
178 struct run_state *run = &rs[v];
179 struct casereader *reader = casereader_clone (input);
180 const struct variable *var = otp->vars[v];
181 struct casewriter *writer;
182 struct percentile *median;
183 struct order_stats *os;
185 subcase_init_var (&sc, var, SC_ASCEND);
186 writer = sort_create_writer (&sc, casereader_get_proto (reader));
188 for (; (c = casereader_read (reader)); )
190 const union value *val = case_data (c, var);
191 const double w = weight ? case_data (c, weight)->f: 1.0;
192 if ( var_is_value_missing (var, val, exclude))
199 casewriter_write (writer, c);
201 subcase_destroy (&sc);
202 casereader_destroy (reader);
203 reader = casewriter_make_reader (writer);
205 median = percentile_create (0.5, cc);
206 os = &median->parent;
208 order_stats_accumulate (&os, 1,
214 run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
215 statistic_destroy (&median->parent.parent);
221 struct casereader *reader = casereader_clone (input);
222 for (; (c = casereader_read (reader)); case_unref (c))
224 const double w = weight ? case_data (c, weight)->f: 1.0;
225 for (v = 0; v < otp->n_vars; ++v)
227 const struct variable *var = otp->vars[v];
228 const union value *val = case_data (c, var);
229 const double x = val->f;
230 struct run_state *run = &rs[v];
232 if ( var_is_value_missing (var, val, exclude))
235 run->cutpoint += x * w;
239 casereader_destroy (reader);
240 for (v = 0; v < otp->n_vars; ++v)
242 struct run_state *run = &rs[v];
243 run->cutpoint /= run->n;
249 for (v = 0; v < otp->n_vars; ++v)
251 struct run_state *run = &rs[v];
252 run->cutpoint = rt->cutpoint;
258 for (; (c = casereader_read (input)); case_unref (c))
260 const double w = weight ? case_data (c, weight)->f: 1.0;
262 for (v = 0; v < otp->n_vars; ++v)
264 struct run_state *run = &rs[v];
265 const struct variable *var = otp->vars[v];
266 const union value *val = case_data (c, var);
268 double d = x - run->cutpoint;
271 if ( var_is_value_missing (var, val, exclude))
285 if (sign != run->last_sign)
288 run->last_sign = sign;
291 casereader_destroy (input);
293 for (v = 0; v < otp->n_vars; ++v)
295 struct run_state *run = &rs[v];
296 run->n = run->np + run->nn;
299 show_runs_result (rt, rs, dict);
307 show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
309 const struct variable *weight = dict_get_weight (dict);
310 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
312 const struct one_sample_test *otp = &rt->parent;
315 const int row_headers = 1;
316 const int column_headers = 1;
317 struct tab_table *table =
318 tab_create (row_headers + otp->n_vars, column_headers + 7);
320 tab_headers (table, row_headers, 0, column_headers, 0);
322 tab_title (table, _("Runs Test"));
324 /* Box around the table and vertical lines inside*/
325 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
326 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
328 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
329 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
331 for (i = 0 ; i < otp->n_vars; ++i)
333 const struct run_state *run = &rs[i];
335 double z = runs_statistic (run);
337 tab_text (table, row_headers + i, 0,
338 TAT_TITLE | TAB_CENTER ,
339 var_to_string (otp->vars[i]));
341 tab_double (table, row_headers +i, 1, 0,
344 tab_double (table, row_headers +i, 2, 0,
347 tab_double (table, row_headers +i, 3, 0,
350 tab_double (table, row_headers +i, 4, 0,
353 tab_double (table, row_headers +i, 5, 0,
356 tab_double (table, row_headers +i, 6, 0,
359 tab_double (table, row_headers +i, 7, 0,
360 2.0 * gsl_cdf_ugaussian_P (z), 0);
363 switch ( rt->cp_mode)
366 tab_text (table, 0, column_headers ,
367 TAT_TITLE | TAB_LEFT , _("Test Value"));
370 tab_text (table, 0, column_headers ,
371 TAT_TITLE | TAB_LEFT , _("Test Value (mode)"));
374 tab_text (table, 0, column_headers ,
375 TAT_TITLE | TAB_LEFT , _("Test Value (mean)"));
378 tab_text (table, 0, column_headers ,
379 TAT_TITLE | TAB_LEFT , _("Test Value (median)"));
383 tab_text (table, 0, column_headers + 1,
384 TAT_TITLE | TAB_LEFT , _("Cases < Test Value"));
386 tab_text (table, 0, column_headers + 2,
387 TAT_TITLE | TAB_LEFT , _("Cases ≥ Test Value"));
389 tab_text (table, 0, column_headers + 3,
390 TAT_TITLE | TAB_LEFT , _("Total Cases"));
392 tab_text (table, 0, column_headers + 4,
393 TAT_TITLE | TAB_LEFT , _("Number of Runs"));
395 tab_text (table, 0, column_headers + 5,
396 TAT_TITLE | TAB_LEFT , _("Z"));
398 tab_text (table, 0, column_headers + 6,
399 TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)"));