1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <gsl/gsl_cdf.h>
23 #include "libpspp/misc.h"
25 #include "libpspp/str.h"
26 #include "data/casereader.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/variable.h"
31 #include "math/moments.h"
32 #include "math/levene.h"
34 #include <output/tab.h>
36 #define _(msgid) gettext (msgid)
41 const struct variable *gvar;
43 const union value *gval0;
44 const union value *gval1;
49 struct moments *mom[2];
55 static void indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps);
56 static void indep_test (const struct tt *tt, const struct pair_stats *ps);
59 which_group (const union value *v, const struct indep_samples *is)
61 int width = var_get_width (is->gvar);
62 int cmp = value_compare_3way (v, is->gval0, width);
69 if (0 == value_compare_3way (v, is->gval1, width))
76 indep_run (struct tt *tt, const struct variable *gvar,
78 const union value *gval0, const union value *gval1,
79 struct casereader *reader)
81 struct indep_samples is;
85 struct pair_stats *ps = xcalloc (sizeof (*ps), tt->n_vars);
89 for (v = 0; v < tt->n_vars; ++v)
91 ps[v].mom[0] = moments_create (MOMENT_VARIANCE);
92 ps[v].mom[1] = moments_create (MOMENT_VARIANCE);
93 ps[v].nl = levene_create (var_get_width (gvar), cut ? gval0: NULL);
101 r = casereader_clone (reader);
102 for ( ; (c = casereader_read (r) ); case_unref (c))
104 double w = dict_get_case_weight (tt->dict, c, NULL);
106 const union value *gv = case_data (c, gvar);
108 int grp = which_group (gv, &is);
112 for (v = 0; v < tt->n_vars; ++v)
114 const union value *val = case_data (c, tt->vars[v]);
115 if (var_is_value_missing (tt->vars[v], val, tt->exclude))
118 moments_pass_one (ps[v].mom[grp], val->f, w);
119 levene_pass_one (ps[v].nl, val->f, w, gv);
122 casereader_destroy (r);
124 r = casereader_clone (reader);
125 for ( ; (c = casereader_read (r) ); case_unref (c))
127 double w = dict_get_case_weight (tt->dict, c, NULL);
129 const union value *gv = case_data (c, gvar);
131 int grp = which_group (gv, &is);
135 for (v = 0; v < tt->n_vars; ++v)
137 const union value *val = case_data (c, tt->vars[v]);
138 if (var_is_value_missing (tt->vars[v], val, tt->exclude))
141 moments_pass_two (ps[v].mom[grp], val->f, w);
142 levene_pass_two (ps[v].nl, val->f, w, gv);
145 casereader_destroy (r);
148 for ( ; (c = casereader_read (r) ); case_unref (c))
150 double w = dict_get_case_weight (tt->dict, c, NULL);
152 const union value *gv = case_data (c, gvar);
154 int grp = which_group (gv, &is);
158 for (v = 0; v < tt->n_vars; ++v)
160 const union value *val = case_data (c, tt->vars[v]);
161 if (var_is_value_missing (tt->vars[v], val, tt->exclude))
164 levene_pass_three (ps[v].nl, val->f, w, gv);
167 casereader_destroy (r);
170 for (v = 0; v < tt->n_vars; ++v)
171 ps[v].lev = levene_calculate (ps[v].nl);
173 indep_summary (tt, &is, ps);
177 for (v = 0; v < tt->n_vars; ++v)
179 moments_destroy (ps[v].mom[0]);
180 moments_destroy (ps[v].mom[1]);
181 levene_destroy (ps[v].nl);
188 indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps)
190 const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
194 const int heading_rows = 1;
195 int rows = tt->n_vars * 2 + heading_rows;
197 struct string vallab0 ;
198 struct string vallab1 ;
199 struct tab_table *t = tab_create (cols, rows);
201 ds_init_empty (&vallab0);
202 ds_init_empty (&vallab1);
204 tab_headers (t, 0, 0, 1, 0);
205 tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
206 tab_hline (t, TAL_2, 0, cols - 1, 1);
208 tab_vline (t, TAL_GAP, 1, 0, rows - 1);
209 tab_title (t, _("Group Statistics"));
210 tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, var_to_string (is->gvar));
211 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
212 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
213 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
214 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
218 /* ds_put_cstr (&vallab0, "≥"); */
219 ds_put_cstr (&vallab0, ">=");
220 ds_put_cstr (&vallab1, "<");
222 var_append_value_name (is->gvar, is->gval0, &vallab0);
223 var_append_value_name (is->gvar, is->gval0, &vallab1);
227 var_append_value_name (is->gvar, is->gval0, &vallab0);
228 var_append_value_name (is->gvar, is->gval1, &vallab1);
231 for (v = 0; v < tt->n_vars; ++v)
234 const struct variable *var = tt->vars[v];
236 tab_text (t, 0, v * 2 + heading_rows, TAB_LEFT,
237 var_to_string (var));
239 tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT,
242 tab_text (t, 1, v * 2 + 1 + heading_rows, TAB_LEFT,
245 for (i = 0 ; i < 2; ++i)
247 double cc, mean, sigma;
248 moments_calculate (ps[v].mom[i], &cc, &mean, &sigma, NULL, NULL);
250 tab_double (t, 2, v * 2 + i + heading_rows, TAB_RIGHT, cc, wfmt);
251 tab_double (t, 3, v * 2 + i + heading_rows, TAB_RIGHT, mean, NULL);
252 tab_double (t, 4, v * 2 + i + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
253 tab_double (t, 5, v * 2 + i + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL);
259 ds_destroy (&vallab0);
260 ds_destroy (&vallab1);
265 indep_test (const struct tt *tt, const struct pair_stats *ps)
268 const int heading_rows = 3;
269 const int rows= tt->n_vars * 2 + heading_rows;
271 const size_t cols = 11;
273 struct tab_table *t = tab_create (cols, rows);
274 tab_headers (t, 0, 0, 3, 0);
275 tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
276 tab_hline (t, TAL_2, 0, cols - 1, 3);
278 tab_title (t, _("Independent Samples Test"));
280 tab_hline (t, TAL_1, 2, cols - 1, 1);
281 tab_vline (t, TAL_2, 2, 0, rows - 1);
282 tab_vline (t, TAL_1, 4, 0, rows - 1);
283 tab_box (t, -1, -1, -1, TAL_1, 2, 1, cols - 2, rows - 1);
284 tab_hline (t, TAL_1, cols - 2, cols - 1, 2);
285 tab_box (t, -1, -1, -1, TAL_1, cols - 2, 2, cols - 1, rows - 1);
286 tab_joint_text (t, 2, 0, 3, 0, TAB_CENTER, _("Levene's Test for Equality of Variances"));
287 tab_joint_text (t, 4, 0, cols - 1, 0, TAB_CENTER, _("t-test for Equality of Means"));
289 tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("F"));
290 tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig."));
291 tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("t"));
292 tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("df"));
293 tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
294 tab_text (t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
295 tab_text (t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference"));
296 tab_text (t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
297 tab_text (t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
299 tab_joint_text_format (t, 9, 1, 10, 1, TAB_CENTER,
300 _("%g%% Confidence Interval of the Difference"),
301 tt->confidence * 100.0);
303 for (v = 0; v < tt->n_vars; ++v)
305 double df, pooled_variance, mean_diff, tval;
306 double se2, std_err_diff;
308 double cc0, mean0, sigma0;
309 double cc1, mean1, sigma1;
310 moments_calculate (ps[v].mom[0], &cc0, &mean0, &sigma0, NULL, NULL);
311 moments_calculate (ps[v].mom[1], &cc1, &mean1, &sigma1, NULL, NULL);
313 tab_text (t, 0, v * 2 + heading_rows, TAB_LEFT, var_to_string (tt->vars[v]));
314 tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT, _("Equal variances assumed"));
316 df = cc0 + cc1 - 2.0;
317 tab_double (t, 5, v * 2 + heading_rows, TAB_RIGHT, df, NULL);
319 pooled_variance = ((cc0 - 1)* sigma0 + (cc1 - 1) * sigma1) / df ;
321 tval = (mean0 - mean1) / sqrt (pooled_variance);
322 tval /= sqrt ((cc0 + cc1) / (cc0 * cc1));
324 tab_double (t, 4, v * 2 + heading_rows, TAB_RIGHT, tval, NULL);
326 p = gsl_cdf_tdist_P (tval, df);
327 q = gsl_cdf_tdist_Q (tval, df);
329 mean_diff = mean0 - mean1;
331 tab_double (t, 6, v * 2 + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL);
332 tab_double (t, 7, v * 2 + heading_rows, TAB_RIGHT, mean_diff, NULL);
334 std_err_diff = sqrt ((sigma0 / cc0) + (sigma1 / cc1));
335 tab_double (t, 8, v * 2 + heading_rows, TAB_RIGHT, std_err_diff, NULL);
338 /* Now work out the confidence interval */
339 q = (1 - tt->confidence)/2.0; /* 2-tailed test */
341 tval = gsl_cdf_tdist_Qinv (q, df);
342 tab_double (t, 9, v * 2 + heading_rows, TAB_RIGHT, mean_diff - tval * std_err_diff, NULL);
343 tab_double (t, 10, v * 2 + heading_rows, TAB_RIGHT, mean_diff + tval * std_err_diff, NULL);
345 /* Equal variances not assumed */
346 tab_text (t, 1, v * 2 + heading_rows + 1, TAB_LEFT, _("Equal variances not assumed"));
348 se2 = sigma0 / cc0 + sigma1 / cc1;
349 tval = mean_diff / sqrt (se2);
350 tab_double (t, 4, v * 2 + heading_rows + 1, TAB_RIGHT, tval, NULL);
354 const double s0 = sigma0 / (cc0);
355 const double s1 = sigma1 / (cc1);
356 double df = pow2 (s0 + s1) ;
357 df /= pow2 (s0) / (cc0 - 1) + pow2 (s1) / (cc1 - 1);
359 tab_double (t, 5, v * 2 + heading_rows + 1, TAB_RIGHT, df, NULL);
361 p = gsl_cdf_tdist_P (tval, df);
362 q = gsl_cdf_tdist_Q (tval, df);
364 tab_double (t, 6, v * 2 + heading_rows + 1, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL);
366 /* Now work out the confidence interval */
367 q = (1 - tt->confidence) / 2.0; /* 2-tailed test */
369 tval = gsl_cdf_tdist_Qinv (q, df);
372 tab_double (t, 7, v * 2 + heading_rows + 1, TAB_RIGHT, mean_diff, NULL);
373 tab_double (t, 8, v * 2 + heading_rows + 1, TAB_RIGHT, std_err_diff, NULL);
374 tab_double (t, 9, v * 2 + heading_rows + 1, TAB_RIGHT, mean_diff - tval * std_err_diff, NULL);
375 tab_double (t, 10, v * 2 + heading_rows + 1, TAB_RIGHT, mean_diff + tval * std_err_diff, NULL);
377 tab_double (t, 2, v * 2 + heading_rows, TAB_CENTER, ps[v].lev, NULL);
381 /* Now work out the significance of the Levene test */
383 double df2 = cc0 + cc1 - 2;
384 double q = gsl_cdf_fdist_Q (ps[v].lev, df1, df2);
385 tab_double (t, 3, v * 2 + heading_rows, TAB_CENTER, q, NULL);