1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include <gsl/gsl_cdf.h>
24 #include "math/moments.h"
25 #include "math/correlation.h"
26 #include "data/casereader.h"
27 #include "data/dictionary.h"
28 #include "data/format.h"
29 #include "data/variable.h"
31 #include "output/pivot-table.h"
34 #define N_(msgid) msgid
35 #define _(msgid) gettext (msgid)
42 const struct variable *var0;
45 const struct variable *var1;
47 struct moments *mom_diff;
52 struct pair_stats *ps;
56 static void paired_summary (const struct tt *tt, struct paired_samp *os);
57 static void paired_correlations (const struct tt *tt, struct paired_samp *os);
58 static void paired_test (const struct tt *tt, const struct paired_samp *os);
61 paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *reader)
64 struct paired_samp ps;
67 ps.ps = xcalloc (n_pairs, sizeof *ps.ps);
69 for (size_t i = 0; i < n_pairs; ++i)
72 struct pair_stats *pp = &ps.ps[i];
73 pp->var0 = (*pair)[0];
74 pp->var1 = (*pair)[1];
75 pp->mom0 = moments_create (MOMENT_VARIANCE);
76 pp->mom1 = moments_create (MOMENT_VARIANCE);
77 pp->mom_diff = moments_create (MOMENT_VARIANCE);
80 r = casereader_clone (reader);
81 for (; (c = casereader_read (r)); case_unref (c))
83 double w = dict_get_case_weight (tt->dict, c, NULL);
85 for (int i = 0; i < ps.n_ps; i++)
87 struct pair_stats *pp = &ps.ps[i];
88 const union value *val0 = case_data (c, pp->var0);
89 const union value *val1 = case_data (c, pp->var1);
90 if (var_is_value_missing (pp->var0, val0, tt->exclude))
93 if (var_is_value_missing (pp->var1, val1, tt->exclude))
96 moments_pass_one (pp->mom0, val0->f, w);
97 moments_pass_one (pp->mom1, val1->f, w);
98 moments_pass_one (pp->mom_diff, val0->f - val1->f, w);
101 casereader_destroy (r);
104 for (; (c = casereader_read (r)); case_unref (c))
106 double w = dict_get_case_weight (tt->dict, c, NULL);
108 for (int i = 0; i < ps.n_ps; i++)
110 struct pair_stats *pp = &ps.ps[i];
111 const union value *val0 = case_data (c, pp->var0);
112 const union value *val1 = case_data (c, pp->var1);
113 if (var_is_value_missing (pp->var0, val0, tt->exclude))
116 if (var_is_value_missing (pp->var1, val1, tt->exclude))
119 moments_pass_two (pp->mom0, val0->f, w);
120 moments_pass_two (pp->mom1, val1->f, w);
121 moments_pass_two (pp->mom_diff, val0->f - val1->f, w);
122 pp->sum_of_prod += val0->f * val1->f * w;
125 casereader_destroy (r);
127 paired_summary (tt, &ps);
128 paired_correlations (tt, &ps);
129 paired_test (tt, &ps);
133 for (int i = 0; i < ps.n_ps; i++)
135 struct pair_stats *pp = &ps.ps[i];
136 moments_destroy (pp->mom0);
137 moments_destroy (pp->mom1);
138 moments_destroy (pp->mom_diff);
144 paired_summary (const struct tt *tt, struct paired_samp *os)
146 struct pivot_table *table = pivot_table_create (
147 N_("Paired Sample Statistics"));
148 pivot_table_set_weight_var (table, tt->wv);
150 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
151 N_("N"), PIVOT_RC_COUNT,
152 N_("Mean"), PIVOT_RC_OTHER,
153 N_("Std. Deviation"), PIVOT_RC_OTHER,
154 N_("S.E. Mean"), PIVOT_RC_OTHER);
156 struct pivot_dimension *variables = pivot_dimension_create (
157 table, PIVOT_AXIS_ROW, N_("Variables"));
159 for (size_t i = 0; i < os->n_ps; i++)
161 struct pair_stats *pp = &os->ps[i];
162 struct pivot_category *pair = pivot_category_create_group__ (
163 variables->root, pivot_value_new_text_format (N_("Pair %zu"), i + 1));
165 for (int j = 0; j < 2; j++)
167 const struct variable *var = j ? pp->var1 : pp->var0;
168 const struct moments *mom = j ? pp->mom1 : pp->mom0;
169 double cc, mean, sigma;
170 moments_calculate (mom, &cc, &mean, &sigma, NULL, NULL);
172 int var_idx = pivot_category_create_leaf (
173 pair, pivot_value_new_variable (var));
175 double entries[] = { cc, mean, sqrt (sigma), sqrt (sigma / cc) };
176 for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
177 pivot_table_put2 (table, j, var_idx,
178 pivot_value_new_number (entries[j]));
182 pivot_table_submit (table);
187 paired_correlations (const struct tt *tt, struct paired_samp *os)
189 struct pivot_table *table = pivot_table_create (
190 N_("Paired Samples Correlations"));
191 pivot_table_set_weight_var (table, tt->wv);
193 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
194 N_("N"), PIVOT_RC_COUNT,
195 N_("Correlation"), PIVOT_RC_CORRELATION,
196 N_("Sig."), PIVOT_RC_SIGNIFICANCE);
198 struct pivot_dimension *pairs = pivot_dimension_create (
199 table, PIVOT_AXIS_ROW, N_("Pairs"));
201 for (size_t i = 0; i < os->n_ps; i++)
203 struct pair_stats *pp = &os->ps[i];
204 struct pivot_category *group = pivot_category_create_group__ (
205 pairs->root, pivot_value_new_text_format (N_("Pair %zu"), i + 1));
207 int row = pivot_category_create_leaf (
208 group, pivot_value_new_text_format (N_("%s & %s"),
209 var_to_string (pp->var0),
210 var_to_string (pp->var1)));
212 double cc0, mean0, sigma0;
213 double cc1, mean1, sigma1;
214 moments_calculate (pp->mom0, &cc0, &mean0, &sigma0, NULL, NULL);
215 moments_calculate (pp->mom1, &cc1, &mean1, &sigma1, NULL, NULL);
216 /* If this fails, then we're not dealing with missing values properly */
219 double corr = ((pp->sum_of_prod / cc0 - mean0 * mean1)
220 / sqrt (sigma0 * sigma1) * cc0 / (cc0 - 1));
221 double sig = 2.0 * significance_of_correlation (corr, cc0);
222 double entries[] = { cc0, corr, sig };
223 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
224 pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i]));
227 pivot_table_submit (table);
232 paired_test (const struct tt *tt, const struct paired_samp *os)
234 struct pivot_table *table = pivot_table_create (N_("Paired Samples Test"));
235 pivot_table_set_weight_var (table, tt->wv);
237 struct pivot_dimension *statistics = pivot_dimension_create (
238 table, PIVOT_AXIS_COLUMN, N_("Statistics"));
239 struct pivot_category *group = pivot_category_create_group (
240 statistics->root, N_("Paired Differences"),
241 N_("Mean"), PIVOT_RC_OTHER,
242 N_("Std. Deviation"), PIVOT_RC_OTHER,
243 N_("S.E. Mean"), PIVOT_RC_OTHER);
244 struct pivot_category *interval = pivot_category_create_group__ (
245 group, pivot_value_new_text_format (
246 N_("%g%% Confidence Interval of the Difference"),
247 tt->confidence * 100.0));
248 pivot_category_create_leaves (interval,
249 N_("Lower"), PIVOT_RC_OTHER,
250 N_("Upper"), PIVOT_RC_OTHER);
251 pivot_category_create_leaves (statistics->root,
252 N_("t"), PIVOT_RC_OTHER,
253 N_("df"), PIVOT_RC_COUNT,
254 N_("Sig. (2-tailed)"), PIVOT_RC_SIGNIFICANCE);
256 struct pivot_dimension *pairs = pivot_dimension_create (
257 table, PIVOT_AXIS_ROW, N_("Pairs"));
259 for (size_t i = 0; i < os->n_ps; i++)
261 struct pair_stats *pp = &os->ps[i];
262 struct pivot_category *group = pivot_category_create_group__ (
263 pairs->root, pivot_value_new_text_format (N_("Pair %zu"), i + 1));
265 int row = pivot_category_create_leaf (
266 group, pivot_value_new_text_format (N_("%s - %s"),
267 var_to_string (pp->var0),
268 var_to_string (pp->var1)));
270 double cc, mean, sigma;
271 moments_calculate (pp->mom_diff, &cc, &mean, &sigma, NULL, NULL);
273 double df = cc - 1.0;
275 double t = mean * sqrt (cc / sigma);
276 double se_mean = sqrt (sigma / cc);
278 double p = gsl_cdf_tdist_P (t, df);
279 double q = gsl_cdf_tdist_Q (t, df);
280 double sig = 2.0 * (t > 0 ? q : p);
282 double t_qinv = gsl_cdf_tdist_Qinv ((1.0 - tt->confidence) / 2.0, df);
288 mean - t_qinv * se_mean,
289 mean + t_qinv * se_mean,
294 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
295 pivot_table_put2 (table, i, row, pivot_value_new_number (entries[i]));
299 pivot_table_submit (table);