1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <gsl/gsl_cdf.h>
23 #include "libpspp/misc.h"
25 #include "libpspp/str.h"
26 #include "data/casereader.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/variable.h"
30 #include "math/moments.h"
31 #include "math/levene.h"
32 #include "output/pivot-table.h"
35 #define N_(msgid) msgid
36 #define _(msgid) gettext (msgid)
41 const struct variable *gvar;
43 const union value *gval0;
44 const union value *gval1;
49 struct moments *mom[2];
55 static void indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps);
56 static void indep_test (const struct tt *tt, const struct pair_stats *ps);
59 which_group (const union value *v, const struct indep_samples *is)
61 int width = var_get_width (is->gvar);
62 int cmp = value_compare_3way (v, is->gval0, width);
69 if (0 == value_compare_3way (v, is->gval1, width))
76 indep_run (struct tt *tt, const struct variable *gvar,
78 const union value *gval0, const union value *gval1,
79 struct casereader *reader)
81 struct indep_samples is;
85 struct pair_stats *ps = XCALLOC (tt->n_vars, struct pair_stats);
89 for (v = 0; v < tt->n_vars; ++v)
91 ps[v].mom[0] = moments_create (MOMENT_VARIANCE);
92 ps[v].mom[1] = moments_create (MOMENT_VARIANCE);
93 ps[v].nl = levene_create (var_get_width (gvar), cut ? gval0: NULL);
101 r = casereader_clone (reader);
102 for (; (c = casereader_read (r)); case_unref (c))
104 double w = dict_get_case_weight (tt->dict, c, NULL);
106 const union value *gv = case_data (c, gvar);
108 int grp = which_group (gv, &is);
112 for (v = 0; v < tt->n_vars; ++v)
114 const union value *val = case_data (c, tt->vars[v]);
115 if (var_is_value_missing (tt->vars[v], val) & tt->exclude)
118 moments_pass_one (ps[v].mom[grp], val->f, w);
119 levene_pass_one (ps[v].nl, val->f, w, gv);
122 casereader_destroy (r);
124 r = casereader_clone (reader);
125 for (; (c = casereader_read (r)); case_unref (c))
127 double w = dict_get_case_weight (tt->dict, c, NULL);
129 const union value *gv = case_data (c, gvar);
131 int grp = which_group (gv, &is);
135 for (v = 0; v < tt->n_vars; ++v)
137 const union value *val = case_data (c, tt->vars[v]);
138 if (var_is_value_missing (tt->vars[v], val) & tt->exclude)
141 moments_pass_two (ps[v].mom[grp], val->f, w);
142 levene_pass_two (ps[v].nl, val->f, w, gv);
145 casereader_destroy (r);
148 for (; (c = casereader_read (r)); case_unref (c))
150 double w = dict_get_case_weight (tt->dict, c, NULL);
152 const union value *gv = case_data (c, gvar);
154 int grp = which_group (gv, &is);
158 for (v = 0; v < tt->n_vars; ++v)
160 const union value *val = case_data (c, tt->vars[v]);
161 if (var_is_value_missing (tt->vars[v], val) & tt->exclude)
164 levene_pass_three (ps[v].nl, val->f, w, gv);
167 casereader_destroy (r);
170 for (v = 0; v < tt->n_vars; ++v)
171 ps[v].lev = levene_calculate (ps[v].nl);
173 indep_summary (tt, &is, ps);
177 for (v = 0; v < tt->n_vars; ++v)
179 moments_destroy (ps[v].mom[0]);
180 moments_destroy (ps[v].mom[1]);
181 levene_destroy (ps[v].nl);
188 indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps)
190 struct pivot_table *table = pivot_table_create (N_("Group Statistics"));
191 pivot_table_set_weight_var (table, tt->wv);
193 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
194 N_("N"), PIVOT_RC_COUNT,
195 N_("Mean"), PIVOT_RC_OTHER,
196 N_("Std. Deviation"), PIVOT_RC_OTHER,
197 N_("S.E. Mean"), PIVOT_RC_OTHER);
199 struct pivot_dimension *group = pivot_dimension_create (
200 table, PIVOT_AXIS_ROW, N_("Group"));
201 group->root->show_label = true;
204 struct string vallab0 = DS_EMPTY_INITIALIZER;
205 ds_put_cstr (&vallab0, "≥");
206 var_append_value_name (is->gvar, is->gval0, &vallab0);
207 pivot_category_create_leaf (group->root,
208 pivot_value_new_user_text_nocopy (
209 ds_steal_cstr (&vallab0)));
211 struct string vallab1 = DS_EMPTY_INITIALIZER;
212 ds_put_cstr (&vallab1, "<");
213 var_append_value_name (is->gvar, is->gval0, &vallab1);
214 pivot_category_create_leaf (group->root,
215 pivot_value_new_user_text_nocopy (
216 ds_steal_cstr (&vallab1)));
220 pivot_category_create_leaf (
221 group->root, pivot_value_new_var_value (is->gvar, is->gval0));
222 pivot_category_create_leaf (
223 group->root, pivot_value_new_var_value (is->gvar, is->gval1));
226 struct pivot_dimension *dep_vars = pivot_dimension_create (
227 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
229 for (size_t v = 0; v < tt->n_vars; ++v)
231 const struct variable *var = tt->vars[v];
233 int dep_var_idx = pivot_category_create_leaf (
234 dep_vars->root, pivot_value_new_variable (var));
236 for (int i = 0 ; i < 2; ++i)
238 double cc, mean, sigma;
239 moments_calculate (ps[v].mom[i], &cc, &mean, &sigma, NULL, NULL);
241 double entries[] = { cc, mean, sqrt (sigma), sqrt (sigma / cc) };
242 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
243 pivot_table_put3 (table, j, i, dep_var_idx,
244 pivot_value_new_number (entries[j]));
248 pivot_table_submit (table);
253 indep_test (const struct tt *tt, const struct pair_stats *ps)
255 struct pivot_table *table = pivot_table_create (
256 N_("Independent Samples Test"));
258 struct pivot_dimension *statistics = pivot_dimension_create (
259 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
260 pivot_category_create_group (
261 statistics->root, N_("Levene's Test for Equality of Variances"),
262 N_("F"), PIVOT_RC_OTHER,
263 N_("Sig."), PIVOT_RC_SIGNIFICANCE);
264 struct pivot_category *group = pivot_category_create_group (
265 statistics->root, N_("T-Test for Equality of Means"),
266 N_("t"), PIVOT_RC_OTHER,
267 N_("df"), PIVOT_RC_OTHER,
268 N_("Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE,
269 N_("Mean Difference"), PIVOT_RC_OTHER,
270 N_("Std. Error Difference"), PIVOT_RC_OTHER);
271 pivot_category_create_group (
272 /* xgettext:no-c-format */
273 group, N_("95% Confidence Interval of the Difference"),
274 N_("Lower"), PIVOT_RC_OTHER,
275 N_("Upper"), PIVOT_RC_OTHER);
277 pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Assumptions"),
278 N_("Equal variances assumed"),
279 N_("Equal variances not assumed"));
281 struct pivot_dimension *dep_vars = pivot_dimension_create (
282 table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
284 for (size_t v = 0; v < tt->n_vars; ++v)
286 int dep_var_idx = pivot_category_create_leaf (
287 dep_vars->root, pivot_value_new_variable (tt->vars[v]));
289 double cc0, mean0, sigma0;
290 double cc1, mean1, sigma1;
291 moments_calculate (ps[v].mom[0], &cc0, &mean0, &sigma0, NULL, NULL);
292 moments_calculate (ps[v].mom[1], &cc1, &mean1, &sigma1, NULL, NULL);
294 double mean_diff = mean0 - mean1;
297 /* Equal variances assumed. */
298 double e_df = cc0 + cc1 - 2.0;
299 double e_pooled_variance = ((cc0 - 1)* sigma0 + (cc1 - 1) * sigma1) / e_df;
300 double e_tval = ((mean0 - mean1) / sqrt (e_pooled_variance)
301 / sqrt ((cc0 + cc1) / (cc0 * cc1)));
302 double e_p = gsl_cdf_tdist_P (e_tval, e_df);
303 double e_q = gsl_cdf_tdist_Q (e_tval, e_df);
304 double e_sig = 2.0 * (e_tval > 0 ? e_q : e_p);
305 double e_std_err_diff = sqrt (e_pooled_variance * (1.0/cc0 + 1.0/cc1));
306 double e_tval_qinv = gsl_cdf_tdist_Qinv ((1 - tt->confidence) / 2.0, e_df);
308 /* Equal variances not assumed */
309 const double s0 = sigma0 / cc0;
310 const double s1 = sigma1 / cc1;
311 double d_df = (pow2 (s0 + s1) / (pow2 (s0) / (cc0 - 1)
312 + pow2 (s1) / (cc1 - 1)));
313 double d_tval = mean_diff / sqrt (sigma0 / cc0 + sigma1 / cc1);
314 double d_p = gsl_cdf_tdist_P (d_tval, d_df);
315 double d_q = gsl_cdf_tdist_Q (d_tval, d_df);
316 double d_sig = 2.0 * (d_tval > 0 ? d_q : d_p);
317 double d_std_err_diff = sqrt ((sigma0 / cc0) + (sigma1 / cc1));
318 double d_tval_qinv = gsl_cdf_tdist_Qinv ((1 - tt->confidence) / 2.0, d_df);
329 { 0, 1, gsl_cdf_fdist_Q (ps[v].lev, 1, cc0 + cc1 - 2) },
335 { 0, 6, e_std_err_diff },
336 { 0, 7, mean_diff - e_tval_qinv * e_std_err_diff },
337 { 0, 8, mean_diff + e_tval_qinv * e_std_err_diff },
343 { 1, 6, d_std_err_diff },
344 { 1, 7, mean_diff - d_tval_qinv * d_std_err_diff },
345 { 1, 8, mean_diff + d_tval_qinv * d_std_err_diff },
348 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
350 const struct entry *e = &entries[i];
351 pivot_table_put3 (table, e->stat_idx, e->assumption_idx,
352 dep_var_idx, pivot_value_new_number (e->x));
356 pivot_table_submit (table);