1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include <gsl/gsl_cdf.h>
24 #include "math/moments.h"
25 #include "math/correlation.h"
26 #include "data/casereader.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/variable.h"
30 #include "libpspp/hmapx.h"
31 #include "libpspp/hash-functions.h"
33 #include "output/tab.h"
36 #define _(msgid) gettext (msgid)
44 const struct variable *var0;
47 const struct variable *var1;
49 struct moments *mom_diff;
57 static void paired_summary (const struct tt *tt, struct paired_samp *os);
58 static void paired_correlations (const struct tt *tt, struct paired_samp *os);
59 static void paired_test (const struct tt *tt, const struct paired_samp *os);
62 paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *reader)
66 struct paired_samp ps;
68 struct hmapx_node *node;
69 struct pair_stats *pp = NULL;
71 hmapx_init (&ps.hmap);
73 for (i = 0; i < n_pairs; ++i)
77 struct pair_stats *pp = xzalloc (sizeof *pp);
79 pp->var0 = (*pair)[0];
80 pp->var1 = (*pair)[1];
81 pp->mom0 = moments_create (MOMENT_VARIANCE);
82 pp->mom1 = moments_create (MOMENT_VARIANCE);
83 pp->mom_diff = moments_create (MOMENT_VARIANCE);
85 hash = hash_pointer ((*pair)[0], 0);
86 hash = hash_pointer ((*pair)[1], hash);
88 hmapx_insert (&ps.hmap, pp, hash);
91 r = casereader_clone (reader);
92 for ( ; (c = casereader_read (r) ); case_unref (c))
94 double w = dict_get_case_weight (tt->dict, c, NULL);
96 struct hmapx_node *node;
97 struct pair_stats *pp = NULL;
98 HMAPX_FOR_EACH (pp, node, &ps.hmap)
100 const union value *val0 = case_data (c, pp->var0);
101 const union value *val1 = case_data (c, pp->var1);
102 if (var_is_value_missing (pp->var0, val0, tt->exclude))
105 if (var_is_value_missing (pp->var1, val1, tt->exclude))
108 moments_pass_one (pp->mom0, val0->f, w);
109 moments_pass_one (pp->mom1, val1->f, w);
110 moments_pass_one (pp->mom_diff, val0->f - val1->f, w);
113 casereader_destroy (r);
116 for ( ; (c = casereader_read (r) ); case_unref (c))
118 double w = dict_get_case_weight (tt->dict, c, NULL);
120 struct hmapx_node *node;
121 struct pair_stats *pp = NULL;
122 HMAPX_FOR_EACH (pp, node, &ps.hmap)
124 const union value *val0 = case_data (c, pp->var0);
125 const union value *val1 = case_data (c, pp->var1);
126 if (var_is_value_missing (pp->var0, val0, tt->exclude))
129 if (var_is_value_missing (pp->var1, val1, tt->exclude))
132 moments_pass_two (pp->mom0, val0->f, w);
133 moments_pass_two (pp->mom1, val1->f, w);
134 moments_pass_two (pp->mom_diff, val0->f - val1->f, w);
135 pp->sum_of_prod += val0->f * val1->f * w;
138 casereader_destroy (r);
140 paired_summary (tt, &ps);
141 paired_correlations (tt, &ps);
142 paired_test (tt, &ps);
145 HMAPX_FOR_EACH (pp, node, &ps.hmap)
147 moments_destroy (pp->mom0);
148 moments_destroy (pp->mom1);
149 moments_destroy (pp->mom_diff);
153 hmapx_destroy (&ps.hmap);
157 paired_summary (const struct tt *tt, struct paired_samp *os)
159 size_t n_pairs = hmapx_count (&os->hmap);
160 struct hmapx_node *node;
161 struct pair_stats *pp = NULL;
163 const int heading_rows = 1;
164 const int heading_cols = 2;
166 const int cols = 4 + heading_cols;
167 const int rows = n_pairs * 2 + heading_rows;
168 struct tab_table *t = tab_create (cols, rows);
169 const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
170 tab_set_format (t, RC_WEIGHT, wfmt);
171 tab_headers (t, 0, 0, heading_rows, 0);
172 tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
173 tab_box (t, -1, -1, TAL_0, TAL_1, heading_cols, 0, cols - 1, rows - 1);
175 tab_hline (t, TAL_2, 0, cols - 1, 1);
177 tab_title (t, _("Paired Sample Statistics"));
178 tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
179 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
180 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
181 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
182 tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
184 HMAPX_FOR_EACH (pp, node, &os->hmap)
187 double cc, mean, sigma;
189 tab_text_format (t, 0, v * 2 + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn + 1);
192 moments_calculate (pp->mom0, &cc, &mean, &sigma, NULL, NULL);
193 tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT, var_to_string (pp->var0));
194 tab_double (t, 3, v * 2 + heading_rows, TAB_RIGHT, cc, NULL, RC_WEIGHT);
195 tab_double (t, 2, v * 2 + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER);
196 tab_double (t, 4, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER);
197 tab_double (t, 5, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL, RC_OTHER);
200 moments_calculate (pp->mom1, &cc, &mean, &sigma, NULL, NULL);
201 tab_text (t, 1, v * 2 + 1 + heading_rows, TAB_LEFT, var_to_string (pp->var1));
202 tab_double (t, 3, v * 2 + 1 + heading_rows, TAB_RIGHT, cc, NULL, RC_WEIGHT);
203 tab_double (t, 2, v * 2 + 1 + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER);
204 tab_double (t, 4, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER);
205 tab_double (t, 5, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL, RC_OTHER);
213 paired_correlations (const struct tt *tt, struct paired_samp *os)
215 size_t n_pairs = hmapx_count (&os->hmap);
216 struct hmapx_node *node;
217 struct pair_stats *pp = NULL;
218 const int heading_rows = 1;
219 const int heading_cols = 2;
222 const int rows = n_pairs + heading_rows;
223 struct tab_table *t = tab_create (cols, rows);
224 const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
225 tab_set_format (t, RC_WEIGHT, wfmt);
226 tab_headers (t, 0, 0, heading_rows, 0);
227 tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
229 tab_hline (t, TAL_2, 0, cols - 1, 1);
231 tab_title (t, _("Paired Samples Correlations"));
232 tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
233 tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
234 tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation"));
235 tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig."));
237 HMAPX_FOR_EACH (pp, node, &os->hmap)
240 double cc0, mean0, sigma0;
241 double cc1, mean1, sigma1;
244 tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn + 1);
246 tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s & %s"),
247 var_to_string (pp->var0),
248 var_to_string (pp->var1));
250 moments_calculate (pp->mom0, &cc0, &mean0, &sigma0, NULL, NULL);
251 moments_calculate (pp->mom1, &cc1, &mean1, &sigma1, NULL, NULL);
253 /* If this fails, then we're not dealing with missing values properly */
256 tab_double (t, 2, v + heading_rows, TAB_RIGHT, cc0, NULL, RC_WEIGHT);
258 corr = pp->sum_of_prod / cc0 - (mean0 * mean1);
259 corr /= sqrt (sigma0 * sigma1);
260 corr *= cc0 / (cc0 - 1);
262 tab_double (t, 3, v + heading_rows, TAB_RIGHT, corr, NULL, RC_OTHER);
263 tab_double (t, 4, v + heading_rows, TAB_RIGHT,
264 2.0 * significance_of_correlation (corr, cc0), NULL, RC_PVALUE);
272 paired_test (const struct tt *tt, const struct paired_samp *os)
274 size_t n_pairs = hmapx_count (&os->hmap);
275 struct hmapx_node *node;
276 struct pair_stats *pp = NULL;
278 const int heading_rows = 3;
279 const int heading_cols = 2;
280 const size_t rows = heading_rows + n_pairs;
281 const size_t cols = 10;
282 const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
284 struct tab_table *t = tab_create (cols, rows);
285 tab_set_format (t, RC_WEIGHT, wfmt);
286 tab_headers (t, 0, 0, heading_rows, 0);
287 tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
288 tab_hline (t, TAL_2, 0, cols - 1, 3);
290 tab_title (t, _("Paired Samples Test"));
291 tab_hline (t, TAL_1, heading_cols, 6, 1);
292 tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
294 tab_box (t, -1, -1, -1, TAL_1, heading_cols, 0, cols - 1, rows - 1);
296 tab_joint_text (t, 2, 0, 6, 0, TAB_CENTER,
297 _("Paired Differences"));
299 tab_joint_text_format (t, 5, 1, 6, 1, TAB_CENTER,
300 _("%g%% Confidence Interval of the Difference"),
301 tt->confidence * 100.0);
303 tab_vline (t, TAL_GAP, 6, 1, 1);
304 tab_hline (t, TAL_1, 5, 6, 2);
305 tab_text (t, 7, 2, TAB_CENTER | TAT_TITLE, _("t"));
306 tab_text (t, 8, 2, TAB_CENTER | TAT_TITLE, _("df"));
307 tab_text (t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
308 tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean"));
309 tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
310 tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
312 tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
313 tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
315 HMAPX_FOR_EACH (pp, node, &os->hmap)
318 double cc, mean, sigma;
324 moments_calculate (pp->mom_diff, &cc, &mean, &sigma, NULL, NULL);
327 tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), v + 1);
329 tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s - %s"),
330 var_to_string (pp->var0),
331 var_to_string (pp->var1));
333 tval = mean * sqrt (cc / sigma);
334 se_mean = sqrt (sigma / cc);
336 tab_double (t, 2, v + heading_rows, TAB_RIGHT, mean, NULL, RC_OTHER);
337 tab_double (t, 3, v + heading_rows, TAB_RIGHT, sqrt (sigma), NULL, RC_OTHER);
338 tab_double (t, 4, v + heading_rows, TAB_RIGHT, se_mean, NULL, RC_OTHER);
340 tab_double (t, 7, v + heading_rows, TAB_RIGHT, tval, NULL, RC_OTHER);
341 tab_double (t, 8, v + heading_rows, TAB_RIGHT, df, NULL, RC_WEIGHT);
344 p = gsl_cdf_tdist_P (tval, df);
345 q = gsl_cdf_tdist_Q (tval, df);
347 tab_double (t, 9, v + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL, RC_PVALUE);
349 tval = gsl_cdf_tdist_Qinv ( (1.0 - tt->confidence) / 2.0, df);
351 tab_double (t, 5, v + heading_rows, TAB_RIGHT, mean - tval * se_mean, NULL, RC_OTHER);
352 tab_double (t, 6, v + heading_rows, TAB_RIGHT, mean + tval * se_mean, NULL, RC_OTHER);