1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include <gsl/gsl_cdf.h>
23 #include "data/format.h"
24 #include "libpspp/cast.h"
26 #include "data/variable.h"
27 #include "data/case.h"
28 #include "data/dictionary.h"
29 #include "data/dataset.h"
30 #include "data/casereader.h"
31 #include "data/casewriter.h"
32 #include "data/subcase.h"
35 #include "data/casereader.h"
36 #include "math/percentiles.h"
38 #include "math/sort.h"
40 #include "libpspp/hmap.h"
41 #include "libpspp/array.h"
42 #include "libpspp/str.h"
43 #include "data/value.h"
44 #include "libpspp/misc.h"
46 #include "output/tab.h"
49 #define _(msgid) gettext (msgid)
54 struct hmap_node node;
62 const struct variable *var;
63 struct val_node **sorted_array;
72 val_node_cmp_3way (const void *a_, const void *b_, const void *aux)
74 const struct variable *indep_var = aux;
75 const struct val_node *const *a = a_;
76 const struct val_node *const *b = b_;
78 return value_compare_3way (&(*a)->val, &(*b)->val, var_get_width (indep_var));
82 show_frequencies (const struct n_sample_test *nst, const struct results *results, int n_vals, const struct dictionary *);
85 show_test_statistics (const struct n_sample_test *nst, const struct results *results, int, const struct dictionary *);
88 static struct val_node *
89 find_value (const struct hmap *map, const union value *val,
90 const struct variable *var)
92 struct val_node *foo = NULL;
93 size_t hash = value_hash (val, var_get_width (var), 0);
94 HMAP_FOR_EACH_WITH_HASH (foo, struct val_node, node, hash, map)
95 if (value_equal (val, &foo->val, var_get_width (var)))
102 median_execute (const struct dataset *ds,
103 struct casereader *input,
104 enum mv_class exclude,
105 const struct npar_test *test,
109 const struct dictionary *dict = dataset_dict (ds);
110 const struct variable *wvar = dict_get_weight (dict);
113 const struct median_test *mt = UP_CAST (test, const struct median_test,
116 const struct n_sample_test *nst = UP_CAST (test, const struct n_sample_test,
119 const bool n_sample_test = (value_compare_3way (&nst->val2, &nst->val1,
120 var_get_width (nst->indep_var)) > 0);
122 struct results *results = xcalloc (nst->n_vars, sizeof (*results));
124 for (v = 0; v < nst->n_vars; ++v)
128 double median = mt->median;
129 const struct variable *var = nst->vars[v];
131 struct hmap map = HMAP_INITIALIZER (map);
132 struct casereader *r = casereader_clone (input);
136 if (n_sample_test == false)
138 struct val_node *vn = xzalloc (sizeof *vn);
139 value_clone (&vn->val, &nst->val1, var_get_width (nst->indep_var));
140 hmap_insert (&map, &vn->node, value_hash (&nst->val1,
141 var_get_width (nst->indep_var), 0));
143 vn = xzalloc (sizeof *vn);
144 value_clone (&vn->val, &nst->val2, var_get_width (nst->indep_var));
145 hmap_insert (&map, &vn->node, value_hash (&nst->val2,
146 var_get_width (nst->indep_var), 0));
149 if ( median == SYSMIS)
151 struct percentile *ptl;
152 struct order_stats *os;
154 struct casereader *rr;
156 struct casewriter *writer;
157 subcase_init_var (&sc, var, SC_ASCEND);
158 rr = casereader_clone (r);
159 writer = sort_create_writer (&sc, casereader_get_proto (rr));
161 for (; (c = casereader_read (rr)) != NULL; )
163 if ( var_is_value_missing (var, case_data (c, var), exclude))
169 cc += dict_get_case_weight (dict, c, &warn);
170 casewriter_write (writer, c);
172 subcase_destroy (&sc);
173 casereader_destroy (rr);
175 rr = casewriter_make_reader (writer);
177 ptl = percentile_create (0.5, cc);
180 order_stats_accumulate (&os, 1,
186 median = percentile_calculate (ptl, PC_HAVERAGE);
187 statistic_destroy (&ptl->parent.parent);
190 results[v].median = median;
193 for (; (c = casereader_read (r)) != NULL; case_unref (c))
195 struct val_node *vn ;
196 const double weight = dict_get_case_weight (dict, c, &warn);
197 const union value *val = case_data (c, var);
198 const union value *indep_val = case_data (c, nst->indep_var);
200 if ( var_is_value_missing (var, case_data (c, var), exclude))
208 int width = var_get_width (nst->indep_var);
209 /* Ignore out of range values */
211 value_compare_3way (indep_val, &nst->val1, width) < 0
213 value_compare_3way (indep_val, &nst->val2, width) > 0
221 vn = find_value (&map, indep_val, nst->indep_var);
224 if ( n_sample_test == true)
226 int width = var_get_width (nst->indep_var);
227 vn = xzalloc (sizeof *vn);
228 value_clone (&vn->val, indep_val, width);
230 hmap_insert (&map, &vn->node, value_hash (indep_val, width, 0));
238 if (val->f <= median)
245 casereader_destroy (r);
249 struct val_node *vn = NULL;
252 HMAP_FOR_EACH (vn, struct val_node, node, &map)
258 results[v].n = count;
259 results[v].sorted_array = xcalloc (hmap_count (&map), sizeof (void*));
260 results[v].var = var;
262 HMAP_FOR_EACH (vn, struct val_node, node, &map)
264 double e_0j = r_0 * (vn->le + vn->gt) / count;
265 double e_1j = r_1 * (vn->le + vn->gt) / count;
267 results[v].chisq += pow2 (vn->le - e_0j) / e_0j;
268 results[v].chisq += pow2 (vn->gt - e_1j) / e_1j;
270 results[v].sorted_array[x++] = vn;
276 sort (results[v].sorted_array, x, sizeof (*results[v].sorted_array),
277 val_node_cmp_3way, nst->indep_var);
282 casereader_destroy (input);
284 show_frequencies (nst, results, n_vals, dict);
285 show_test_statistics (nst, results, n_vals, dict);
287 for (v = 0; v < nst->n_vars; ++v)
290 const struct results *rs = results + v;
292 for (i = 0; i < n_vals; ++i)
294 struct val_node *vn = rs->sorted_array[i];
295 value_destroy (&vn->val, var_get_width (nst->indep_var));
298 free (rs->sorted_array);
306 show_frequencies (const struct n_sample_test *nst, const struct results *results, int n_vals, const struct dictionary *dict)
308 const struct variable *weight = dict_get_weight (dict);
309 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
314 const int row_headers = 2;
315 const int column_headers = 2;
316 const int nc = row_headers + n_vals;
317 const int nr = column_headers + nst->n_vars * 2;
319 struct tab_table *table = tab_create (nc, nr);
321 tab_headers (table, row_headers, 0, column_headers, 0);
323 tab_title (table, _("Frequencies"));
325 /* Box around the table and vertical lines inside*/
326 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
327 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
329 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
330 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
332 tab_joint_text (table,
333 row_headers, 0, row_headers + n_vals - 1, 0,
334 TAT_TITLE | TAB_CENTER, var_to_string (nst->indep_var));
337 tab_hline (table, TAL_1, row_headers, tab_nc (table) - 1, 1);
340 for (i = 0; i < n_vals; ++i)
342 const struct results *rs = results + 0;
344 ds_init_empty (&label);
346 var_append_value_name (nst->indep_var, &rs->sorted_array[i]->val,
349 tab_text (table, row_headers + i, 1,
350 TAT_TITLE | TAB_LEFT, ds_cstr (&label));
355 for (v = 0; v < nst->n_vars; ++v)
357 const struct results *rs = &results[v];
358 tab_text (table, 0, column_headers + v * 2,
359 TAT_TITLE | TAB_LEFT, var_to_string (rs->var) );
361 tab_text (table, 1, column_headers + v * 2,
362 TAT_TITLE | TAB_LEFT, _("> Median") );
364 tab_text (table, 1, column_headers + v * 2 + 1,
365 TAT_TITLE | TAB_LEFT, _("≤ Median") );
368 tab_hline (table, TAL_1, 0, tab_nc (table) - 1, column_headers + v * 2);
371 for (v = 0; v < nst->n_vars; ++v)
374 const struct results *rs = &results[v];
376 for (i = 0; i < n_vals; ++i)
378 const struct val_node *vn = rs->sorted_array[i];
379 tab_double (table, row_headers + i, column_headers + v * 2,
382 tab_double (table, row_headers + i, column_headers + v * 2 + 1,
392 show_test_statistics (const struct n_sample_test *nst,
393 const struct results *results,
395 const struct dictionary *dict)
397 const struct variable *weight = dict_get_weight (dict);
398 const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
402 const int row_headers = 1;
403 const int column_headers = 1;
404 const int nc = row_headers + 5;
405 const int nr = column_headers + nst->n_vars;
407 struct tab_table *table = tab_create (nc, nr);
409 tab_headers (table, row_headers, 0, column_headers, 0);
411 tab_title (table, _("Test Statistics"));
414 tab_box (table, TAL_2, TAL_2, -1, TAL_1,
415 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
417 tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
418 tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
420 tab_text (table, row_headers + 0, 0,
421 TAT_TITLE | TAB_CENTER, _("N"));
423 tab_text (table, row_headers + 1, 0,
424 TAT_TITLE | TAB_CENTER, _("Median"));
426 tab_text (table, row_headers + 2, 0,
427 TAT_TITLE | TAB_CENTER, _("Chi-Square"));
429 tab_text (table, row_headers + 3, 0,
430 TAT_TITLE | TAB_CENTER, _("df"));
432 tab_text (table, row_headers + 4, 0,
433 TAT_TITLE | TAB_CENTER, _("Asymp. Sig."));
436 for (v = 0; v < nst->n_vars; ++v)
438 double df = n_vals - 1;
439 const struct results *rs = &results[v];
440 tab_text (table, 0, column_headers + v,
441 TAT_TITLE | TAB_LEFT, var_to_string (rs->var));
444 tab_double (table, row_headers + 0, column_headers + v,
447 tab_double (table, row_headers + 1, column_headers + v,
448 0, rs->median, NULL);
450 tab_double (table, row_headers + 2, column_headers + v,
453 tab_double (table, row_headers + 3, column_headers + v,
456 tab_double (table, row_headers + 4, column_headers + v,
457 0, gsl_cdf_chisq_Q (rs->chisq, df), NULL);