1 /* PSPP - a program for statistical analysis. -*-c-*-
2 Copyright (C) 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
25 #include <data/format.h>
27 #include <libpspp/misc.h>
28 #include <libpspp/message.h>
29 #include <data/procedure.h>
30 #include <data/casereader.h>
31 #include <data/casewriter.h>
32 #include <data/casegrouper.h>
33 #include <data/dictionary.h>
34 #include <data/subcase.h>
35 #include <data/variable.h>
36 #include <math/percentiles.h>
37 #include <math/sort.h>
41 #define _(msgid) gettext (msgid)
46 /* The value used to dichotimise the data */
49 /* The number of cases not less than cutpoint */
52 /* The number of cases less than cutpoint */
55 /* The sum of np and nn */
58 /* The number of runs */
61 /* The sign of the last case seen */
67 /* Return the Z statistic representing the assympototic
68 distribution of the the number of runs */
70 runs_statistic (const struct run_state *rs)
74 double mu = 2 * rs->np * rs->nn;
75 mu /= rs->np + rs->nn;
90 sigma = 2 * rs->np * rs->nn;
91 sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
92 sigma /= pow2 (rs->np + rs->nn);
93 sigma /= rs->np + rs->nn - 1.0;
101 static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
104 runs_execute (const struct dataset *ds,
105 struct casereader *input,
106 enum mv_class exclude,
107 const struct npar_test *test,
113 const struct dictionary *dict = dataset_dict (ds);
114 const struct variable *weight = dict_get_weight (dict);
116 struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
117 struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
118 struct run_state *rs = xcalloc (otp->n_vars, sizeof (*rs));
120 switch ( rt->cp_mode)
124 for (v = 0; v < otp->n_vars; ++v)
126 bool multimodal = false;
127 struct run_state *run = &rs[v];
129 struct casereader *group = NULL;
130 struct casegrouper *grouper;
131 struct casereader *reader = casereader_clone (input);
132 const struct variable *var = otp->vars[v];
134 reader = sort_execute_1var (reader, var);
136 grouper = casegrouper_create_vars (reader, &var, 1);
138 while (casegrouper_get_next_group (grouper, &group))
143 for (; (c = casereader_read (group)); case_unref (c))
145 const double w = weight ? case_data (c, weight)->f: 1.0;
146 const union value *val = case_data (c, var);
147 if ( var_is_value_missing (var, val, exclude))
157 else if ( cc == last_cc)
160 if ( x > run->cutpoint)
164 casereader_destroy (group);
166 casegrouper_destroy (grouper);
168 msg (MW, _("Multiple modes exist for varible `%s'. Using %g as the threshold value."),
169 var_get_name (var), run->cutpoint);
175 for (v = 0; v < otp->n_vars; ++v)
179 struct run_state *run = &rs[v];
180 struct casereader *reader = casereader_clone (input);
181 const struct variable *var = otp->vars[v];
182 struct casewriter *writer;
183 struct percentile *median;
184 struct order_stats *os;
186 subcase_init_var (&sc, var, SC_ASCEND);
187 writer = sort_create_writer (&sc, casereader_get_proto (reader));
189 for (; (c = casereader_read (reader)); case_unref (c))
191 const union value *val = case_data (c, var);
192 const double w = weight ? case_data (c, weight)->f: 1.0;
193 if ( var_is_value_missing (var, val, exclude))
197 casewriter_write (writer, c);
199 subcase_destroy (&sc);
200 casereader_destroy (reader);
201 reader = casewriter_make_reader (writer);
203 median = percentile_create (0.5, cc);
204 os = &median->parent;
206 order_stats_accumulate (&os, 1,
212 run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
213 statistic_destroy (&median->parent.parent);
219 struct casereader *reader = casereader_clone (input);
220 for (; (c = casereader_read (reader)); case_unref (c))
222 const double w = weight ? case_data (c, weight)->f: 1.0;
223 for (v = 0; v < otp->n_vars; ++v)
225 const struct variable *var = otp->vars[v];
226 const union value *val = case_data (c, var);
227 const double x = val->f;
228 struct run_state *run = &rs[v];
230 if ( var_is_value_missing (var, val, exclude))
233 run->cutpoint += x * w;
237 casereader_destroy (reader);
238 for (v = 0; v < otp->n_vars; ++v)
240 struct run_state *run = &rs[v];
241 run->cutpoint /= run->n;
247 for (v = 0; v < otp->n_vars; ++v)
249 struct run_state *run = &rs[v];
250 run->cutpoint = rt->cutpoint;
256 for (; (c = casereader_read (input)); case_unref (c))
258 const double w = weight ? case_data (c, weight)->f: 1.0;
260 for (v = 0; v < otp->n_vars; ++v)
262 struct run_state *run = &rs[v];
263 const struct variable *var = otp->vars[v];
264 const union value *val = case_data (c, var);
266 double d = x - run->cutpoint;
269 if ( var_is_value_missing (var, val, exclude))
283 if (sign != run->last_sign)
286 run->last_sign = sign;
289 casereader_destroy (input);
291 for (v = 0; v < otp->n_vars; ++v)
293 struct run_state *run = &rs[v];
294 run->n = run->np + run->nn;
297 show_runs_result (rt, rs, dict);
303 #include <output/tab.h>
306 show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
308 const struct variable *weight = dict_get_weight (dict);
309 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
311 const struct one_sample_test *otp = &rt->parent;
314 const int row_headers = 1;
315 const int column_headers = 1;
316 struct tab_table *table =
317 tab_create (row_headers + otp->n_vars, column_headers + 7);
319 tab_headers (table, row_headers, 0, column_headers, 0);
321 tab_title (table, _("Runs Test"));
323 /* Box around the table and vertical lines inside*/
324 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
325 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
327 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
328 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
330 for (i = 0 ; i < otp->n_vars; ++i)
332 const struct run_state *run = &rs[i];
334 double z = runs_statistic (run);
336 tab_text (table, row_headers + i, 0,
337 TAT_TITLE | TAB_CENTER ,
338 var_to_string (otp->vars[i]));
340 tab_double (table, row_headers +i, 1, 0,
343 tab_double (table, row_headers +i, 2, 0,
346 tab_double (table, row_headers +i, 3, 0,
349 tab_double (table, row_headers +i, 4, 0,
352 tab_double (table, row_headers +i, 5, 0,
355 tab_double (table, row_headers +i, 6, 0,
358 tab_double (table, row_headers +i, 7, 0,
359 2.0 * gsl_cdf_ugaussian_P (z), 0);
362 switch ( rt->cp_mode)
365 tab_text (table, 0, column_headers ,
366 TAT_TITLE | TAB_LEFT , _("Test Value "));
369 tab_text (table, 0, column_headers ,
370 TAT_TITLE | TAB_LEFT , _("Test Value (mode)"));
373 tab_text (table, 0, column_headers ,
374 TAT_TITLE | TAB_LEFT , _("Test Value (mean)"));
377 tab_text (table, 0, column_headers ,
378 TAT_TITLE | TAB_LEFT , _("Test Value (median)"));
382 tab_text (table, 0, column_headers + 1,
383 TAT_TITLE | TAB_LEFT , _("Cases < Test Value"));
385 tab_text (table, 0, column_headers + 2,
386 TAT_TITLE | TAB_LEFT , _("Cases >= Test Value"));
388 tab_text (table, 0, column_headers + 3,
389 TAT_TITLE | TAB_LEFT , _("Total Cases"));
391 tab_text (table, 0, column_headers + 4,
392 TAT_TITLE | TAB_LEFT , _("Number of Runs"));
394 tab_text (table, 0, column_headers + 5,
395 TAT_TITLE | TAB_LEFT , _("Z"));
397 tab_text (table, 0, column_headers + 6,
398 TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)"));