1 /* PSPP - a program for statistical analysis. -*-c-*-
2 Copyright (C) 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
25 #include <data/format.h>
27 #include <libpspp/misc.h>
28 #include <libpspp/message.h>
29 #include <data/procedure.h>
30 #include <data/casereader.h>
31 #include <data/casewriter.h>
32 #include <data/casegrouper.h>
33 #include <data/dictionary.h>
34 #include <data/subcase.h>
35 #include <data/variable.h>
36 #include <math/percentiles.h>
37 #include <math/sort.h>
41 #define _(msgid) gettext (msgid)
46 /* The value used to dichotimise the data */
49 /* The number of cases not less than cutpoint */
52 /* The number of cases less than cutpoint */
55 /* The sum of np and nn */
58 /* The number of runs */
61 /* The sign of the last case seen */
67 /* Return the Z statistic representing the assympototic
68 distribution of the the number of runs */
70 runs_statistic (const struct run_state *rs)
74 double mu = 2 * rs->np * rs->nn;
75 mu /= rs->np + rs->nn;
90 sigma = 2 * rs->np * rs->nn;
91 sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
92 sigma /= pow2 (rs->np + rs->nn);
93 sigma /= rs->np + rs->nn - 1.0;
101 static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
104 runs_execute (const struct dataset *ds,
105 struct casereader *input,
106 enum mv_class exclude,
107 const struct npar_test *test,
113 const struct dictionary *dict = dataset_dict (ds);
114 const struct variable *weight = dict_get_weight (dict);
116 struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
117 struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
118 struct run_state *rs = xcalloc (otp->n_vars, sizeof (*rs));
120 switch ( rt->cp_mode)
124 for (v = 0; v < otp->n_vars; ++v)
126 bool multimodal = false;
127 struct run_state *run = &rs[v];
129 struct casereader *group = NULL;
130 struct casegrouper *grouper;
131 struct casereader *reader = casereader_clone (input);
132 const struct variable *var = otp->vars[v];
134 reader = sort_execute_1var (reader, var);
136 grouper = casegrouper_create_vars (reader, &var, 1);
138 while (casegrouper_get_next_group (grouper, &group))
143 for (; (c = casereader_read (group)); case_unref (c))
145 const double w = weight ? case_data (c, weight)->f: 1.0;
146 const union value *val = case_data (c, var);
147 if ( var_is_value_missing (var, val, exclude))
157 else if ( cc == last_cc)
160 if ( x > run->cutpoint)
164 casereader_destroy (group);
166 casegrouper_destroy (grouper);
168 msg (MW, _("Multiple modes exist for varible `%s'. Using %g as the threshold value."),
169 var_get_name (var), run->cutpoint);
175 for (v = 0; v < otp->n_vars; ++v)
179 struct run_state *run = &rs[v];
180 struct casereader *reader = casereader_clone (input);
181 const struct variable *var = otp->vars[v];
182 struct casewriter *writer;
183 struct percentile *median;
184 struct order_stats *os;
186 subcase_init_var (&sc, var, SC_ASCEND);
187 writer = sort_create_writer (&sc, casereader_get_proto (reader));
189 for (; (c = casereader_read (reader)); )
191 const union value *val = case_data (c, var);
192 const double w = weight ? case_data (c, weight)->f: 1.0;
193 if ( var_is_value_missing (var, val, exclude))
200 casewriter_write (writer, c);
202 subcase_destroy (&sc);
203 casereader_destroy (reader);
204 reader = casewriter_make_reader (writer);
206 median = percentile_create (0.5, cc);
207 os = &median->parent;
209 order_stats_accumulate (&os, 1,
215 run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
216 statistic_destroy (&median->parent.parent);
222 struct casereader *reader = casereader_clone (input);
223 for (; (c = casereader_read (reader)); case_unref (c))
225 const double w = weight ? case_data (c, weight)->f: 1.0;
226 for (v = 0; v < otp->n_vars; ++v)
228 const struct variable *var = otp->vars[v];
229 const union value *val = case_data (c, var);
230 const double x = val->f;
231 struct run_state *run = &rs[v];
233 if ( var_is_value_missing (var, val, exclude))
236 run->cutpoint += x * w;
240 casereader_destroy (reader);
241 for (v = 0; v < otp->n_vars; ++v)
243 struct run_state *run = &rs[v];
244 run->cutpoint /= run->n;
250 for (v = 0; v < otp->n_vars; ++v)
252 struct run_state *run = &rs[v];
253 run->cutpoint = rt->cutpoint;
259 for (; (c = casereader_read (input)); case_unref (c))
261 const double w = weight ? case_data (c, weight)->f: 1.0;
263 for (v = 0; v < otp->n_vars; ++v)
265 struct run_state *run = &rs[v];
266 const struct variable *var = otp->vars[v];
267 const union value *val = case_data (c, var);
269 double d = x - run->cutpoint;
272 if ( var_is_value_missing (var, val, exclude))
286 if (sign != run->last_sign)
289 run->last_sign = sign;
292 casereader_destroy (input);
294 for (v = 0; v < otp->n_vars; ++v)
296 struct run_state *run = &rs[v];
297 run->n = run->np + run->nn;
300 show_runs_result (rt, rs, dict);
306 #include <output/tab.h>
309 show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
311 const struct variable *weight = dict_get_weight (dict);
312 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
314 const struct one_sample_test *otp = &rt->parent;
317 const int row_headers = 1;
318 const int column_headers = 1;
319 struct tab_table *table =
320 tab_create (row_headers + otp->n_vars, column_headers + 7);
322 tab_headers (table, row_headers, 0, column_headers, 0);
324 tab_title (table, _("Runs Test"));
326 /* Box around the table and vertical lines inside*/
327 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
328 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
330 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
331 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
333 for (i = 0 ; i < otp->n_vars; ++i)
335 const struct run_state *run = &rs[i];
337 double z = runs_statistic (run);
339 tab_text (table, row_headers + i, 0,
340 TAT_TITLE | TAB_CENTER ,
341 var_to_string (otp->vars[i]));
343 tab_double (table, row_headers +i, 1, 0,
346 tab_double (table, row_headers +i, 2, 0,
349 tab_double (table, row_headers +i, 3, 0,
352 tab_double (table, row_headers +i, 4, 0,
355 tab_double (table, row_headers +i, 5, 0,
358 tab_double (table, row_headers +i, 6, 0,
361 tab_double (table, row_headers +i, 7, 0,
362 2.0 * gsl_cdf_ugaussian_P (z), 0);
365 switch ( rt->cp_mode)
368 tab_text (table, 0, column_headers ,
369 TAT_TITLE | TAB_LEFT , _("Test Value"));
372 tab_text (table, 0, column_headers ,
373 TAT_TITLE | TAB_LEFT , _("Test Value (mode)"));
376 tab_text (table, 0, column_headers ,
377 TAT_TITLE | TAB_LEFT , _("Test Value (mean)"));
380 tab_text (table, 0, column_headers ,
381 TAT_TITLE | TAB_LEFT , _("Test Value (median)"));
385 tab_text (table, 0, column_headers + 1,
386 TAT_TITLE | TAB_LEFT , _("Cases < Test Value"));
388 tab_text (table, 0, column_headers + 2,
389 TAT_TITLE | TAB_LEFT , _("Cases >= Test Value"));
391 tab_text (table, 0, column_headers + 3,
392 TAT_TITLE | TAB_LEFT , _("Total Cases"));
394 tab_text (table, 0, column_headers + 4,
395 TAT_TITLE | TAB_LEFT , _("Number of Runs"));
397 tab_text (table, 0, column_headers + 5,
398 TAT_TITLE | TAB_LEFT , _("Z"));
400 tab_text (table, 0, column_headers + 6,
401 TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)"));