1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011, 2013, 2015, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/casegrouper.h"
22 #include "data/casereader.h"
23 #include "data/dataset.h"
24 #include "data/dictionary.h"
25 #include "data/format.h"
26 #include "data/missing-values.h"
27 #include "language/command.h"
28 #include "language/lexer/lexer.h"
29 #include "language/lexer/variable-parser.h"
30 #include "libpspp/message.h"
31 #include "libpspp/misc.h"
32 #include "libpspp/str.h"
33 #include "math/moments.h"
34 #include "output/pivot-table.h"
35 #include "output/output-item.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
43 const struct variable **items;
46 double sum_of_variances;
47 double variance_of_sums;
48 int totals_idx; /* Casereader index into the totals */
50 struct moments1 **m; /* Moments of the items */
51 struct moments1 *total; /* Moments of the totals */
56 dump_cronbach (const struct cronbach *s)
59 printf ("N items %d\n", s->n_items);
60 for (i = 0; i < s->n_items; ++i)
62 printf ("%s\n", var_get_name (s->items[i]));
65 printf ("Totals idx %d\n", s->totals_idx);
67 printf ("scale variance %g\n", s->variance_of_sums);
68 printf ("alpha %g\n", s->alpha);
82 const struct variable **vars;
84 enum mv_class exclude;
98 const struct variable *wv;
102 static bool run_reliability (struct dataset *ds, const struct reliability *reliability);
105 reliability_destroy (struct reliability *rel)
108 free (rel->scale_name);
110 for (j = 0; j < rel->n_sc; ++j)
113 free (rel->sc[j].items);
114 moments1_destroy (rel->sc[j].total);
116 for (x = 0; x < rel->sc[j].n_items; ++x)
117 free (rel->sc[j].m[x]);
126 cmd_reliability (struct lexer *lexer, struct dataset *ds)
128 const struct dictionary *dict = dataset_dict (ds);
130 struct reliability r = {
131 .model = MODEL_ALPHA,
133 .wv = dict_get_weight (dict),
134 .scale_name = xstrdup ("ANY"),
137 lex_match (lexer, T_SLASH);
139 if (!lex_force_match_id (lexer, "VARIABLES"))
142 lex_match (lexer, T_EQUALS);
144 int vars_start = lex_ofs (lexer);
145 if (!parse_variables_const (lexer, dict, &r.vars, &r.n_vars,
146 PV_NO_DUPLICATE | PV_NUMERIC))
148 int vars_end = lex_ofs (lexer) - 1;
151 lex_ofs_msg (lexer, SW, vars_start, vars_end,
152 _("Reliability on a single variable is not useful."));
154 /* Create a default scale. */
156 r.sc = xcalloc (r.n_sc, sizeof (struct cronbach));
158 struct cronbach *c = &r.sc[0];
159 c->n_items = r.n_vars;
160 c->items = xcalloc (c->n_items, sizeof (struct variable*));
161 for (size_t i = 0; i < c->n_items; ++i)
162 c->items[i] = r.vars[i];
165 while (lex_token (lexer) != T_ENDCMD)
167 lex_match (lexer, T_SLASH);
169 if (lex_match_id (lexer, "SCALE"))
171 struct const_var_set *vs;
172 if (!lex_force_match (lexer, T_LPAREN))
175 if (!lex_force_string (lexer))
178 r.scale_name = xstrdup (lex_tokcstr (lexer));
181 if (!lex_force_match (lexer, T_RPAREN))
184 lex_match (lexer, T_EQUALS);
186 vs = const_var_set_create_from_array (r.vars, r.n_vars);
189 if (!parse_const_var_set_vars (lexer, vs, &r.sc->items, &r.sc->n_items, 0))
191 const_var_set_destroy (vs);
195 const_var_set_destroy (vs);
197 else if (lex_match_id (lexer, "MODEL"))
199 lex_match (lexer, T_EQUALS);
200 if (lex_match_id (lexer, "ALPHA"))
201 r.model = MODEL_ALPHA;
202 else if (lex_match_id (lexer, "SPLIT"))
204 r.model = MODEL_SPLIT;
207 if (lex_match (lexer, T_LPAREN))
209 if (!lex_force_num (lexer))
211 split_ofs = lex_ofs (lexer);
212 r.split_point = lex_number (lexer);
214 if (!lex_force_match (lexer, T_RPAREN))
220 lex_error_expecting (lexer, "ALPHA", "SPLIT");
224 else if (lex_match_id (lexer, "SUMMARY"))
226 lex_match (lexer, T_EQUALS);
227 if (lex_match_id (lexer, "TOTAL") || lex_match (lexer, T_ALL))
228 r.summary_total = true;
231 lex_error_expecting (lexer, "TOTAL", "ALL");
235 else if (lex_match_id (lexer, "MISSING"))
237 lex_match (lexer, T_EQUALS);
238 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
240 if (lex_match_id (lexer, "INCLUDE"))
241 r.exclude = MV_SYSTEM;
242 else if (lex_match_id (lexer, "EXCLUDE"))
246 lex_error_expecting (lexer, "INCLUDE", "EXCLUDE");
251 else if (lex_match_id (lexer, "STATISTICS"))
253 int statistics_start = lex_ofs (lexer) - 1;
254 lex_match (lexer, T_EQUALS);
255 while (lex_match (lexer, T_ID))
257 int statistics_end = lex_ofs (lexer) - 1;
259 lex_ofs_msg (lexer, SW, statistics_start, statistics_end,
260 _("The STATISTICS subcommand is not yet implemented. "
261 "No statistics will be produced."));
265 lex_error_expecting (lexer, "SCALE", "MODEL", "SUMMARY", "MISSING",
271 if (r.model == MODEL_SPLIT)
273 if (r.split_point >= r.n_vars)
275 lex_ofs_error (lexer, split_ofs, split_ofs,
276 _("The split point must be less than the "
277 "number of variables."));
278 lex_ofs_msg (lexer, SN, vars_start, vars_end,
279 ngettext ("There is %zu variable.",
280 "There are %zu variables.", r.n_vars),
286 r.sc = xrealloc (r.sc, sizeof (struct cronbach) * r.n_sc);
288 const struct cronbach *s = &r.sc[0];
290 r.sc[1].n_items = r.split_point == -1 ? s->n_items / 2 : r.split_point;
292 r.sc[2].n_items = s->n_items - r.sc[1].n_items;
293 r.sc[1].items = XCALLOC (r.sc[1].n_items, const struct variable *);
294 r.sc[2].items = XCALLOC (r.sc[2].n_items, const struct variable *);
297 while (i < r.sc[1].n_items)
299 r.sc[1].items[i] = s->items[i];
302 while (i < s->n_items)
304 r.sc[2].items[i - r.sc[1].n_items] = s->items[i];
311 const int base_sc = r.n_sc;
313 r.total_start = base_sc;
315 r.n_sc += r.sc[0].n_items;
316 r.sc = xrealloc (r.sc, sizeof (struct cronbach) * r.n_sc);
318 for (size_t i = 0; i < r.sc[0].n_items; ++i)
320 struct cronbach *s = &r.sc[i + base_sc];
322 s->n_items = r.sc[0].n_items - 1;
323 s->items = xcalloc (s->n_items, sizeof (struct variable *));
326 for (size_t v_src = 0; v_src < r.sc[0].n_items; ++v_src)
328 s->items[v_dest++] = r.sc[0].items[v_src];
332 if (!run_reliability (ds, &r))
335 reliability_destroy (&r);
339 reliability_destroy (&r);
345 do_reliability (struct casereader *group, struct dataset *ds,
346 const struct reliability *rel);
349 static void reliability_summary_total (const struct reliability *rel);
351 static void reliability_statistics (const struct reliability *rel);
355 run_reliability (struct dataset *ds, const struct reliability *reliability)
357 for (size_t si = 0; si < reliability->n_sc; ++si)
359 struct cronbach *s = &reliability->sc[si];
361 s->m = xcalloc (s->n_items, sizeof *s->m);
362 s->total = moments1_create (MOMENT_VARIANCE);
364 for (size_t i = 0; i < s->n_items; ++i)
365 s->m[i] = moments1_create (MOMENT_VARIANCE);
368 struct dictionary *dict = dataset_dict (ds);
369 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
370 struct casereader *group;
371 while (casegrouper_get_next_group (grouper, &group))
373 do_reliability (group, ds, reliability);
375 reliability_statistics (reliability);
377 if (reliability->summary_total)
378 reliability_summary_total (reliability);
381 bool ok = casegrouper_destroy (grouper);
382 ok = proc_commit (ds) && ok;
386 /* Return the sum of all the item variables in S */
388 append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
391 const struct cronbach *s = aux;
393 for (int v = 0; v < s->n_items; ++v)
394 sum += case_num (c, s->items[v]);
400 case_processing_summary (casenumber n_valid, casenumber n_missing,
401 const struct dictionary *);
404 alpha (int k, double sum_of_variances, double variance_of_sums)
406 return k / (k - 1.0) * (1 - sum_of_variances / variance_of_sums);
410 do_reliability (struct casereader *input, struct dataset *ds,
411 const struct reliability *rel)
413 for (size_t si = 0; si < rel->n_sc; ++si)
415 struct cronbach *s = &rel->sc[si];
417 moments1_clear (s->total);
418 for (size_t i = 0; i < s->n_items; ++i)
419 moments1_clear (s->m[i]);
422 casenumber n_missing;
423 input = casereader_create_filter_missing (input,
430 for (size_t si = 0; si < rel->n_sc; ++si)
432 struct cronbach *s = &rel->sc[si];
433 s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
434 input = casereader_create_append_numeric (input, append_sum, s, NULL);
438 casenumber n_valid = 0;
439 for (; (c = casereader_read (input)) != NULL; case_unref (c))
444 for (size_t si = 0; si < rel->n_sc; ++si)
446 struct cronbach *s = &rel->sc[si];
448 for (size_t i = 0; i < s->n_items; ++i)
449 moments1_add (s->m[i], case_num (c, s->items[i]), weight);
450 moments1_add (s->total, case_num_idx (c, s->totals_idx), weight);
453 casereader_destroy (input);
455 for (size_t si = 0; si < rel->n_sc; ++si)
457 struct cronbach *s = &rel->sc[si];
459 s->sum_of_variances = 0;
460 for (size_t i = 0; i < s->n_items; ++i)
462 double weight, mean, variance;
463 moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
465 s->sum_of_variances += variance;
468 moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
471 s->alpha = alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
474 output_item_submit (text_item_create_nocopy (
476 xasprintf (_("Scale: %s"), rel->scale_name),
479 case_processing_summary (n_valid, n_missing, dataset_dict (ds));
483 case_processing_summary (casenumber n_valid, casenumber n_missing,
484 const struct dictionary *dict)
486 struct pivot_table *table = pivot_table_create (
487 N_("Case Processing Summary"));
488 pivot_table_set_weight_var (table, dict_get_weight (dict));
490 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
491 N_("N"), PIVOT_RC_COUNT,
492 N_("Percent"), PIVOT_RC_PERCENT);
494 struct pivot_dimension *cases = pivot_dimension_create (
495 table, PIVOT_AXIS_ROW, N_("Cases"), N_("Valid"), N_("Excluded"),
497 cases->root->show_label = true;
499 casenumber total = n_missing + n_valid;
511 { 1, 0, 100.0 * n_valid / total },
512 { 1, 1, 100.0 * n_missing / total },
516 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
518 const struct entry *e = &entries[i];
519 pivot_table_put2 (table, e->stat_idx, e->case_idx,
520 pivot_value_new_number (e->x));
523 pivot_table_submit (table);
527 reliability_summary_total (const struct reliability *rel)
529 struct pivot_table *table = pivot_table_create (N_("Item-Total Statistics"));
531 pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
532 N_("Scale Mean if Item Deleted"),
533 N_("Scale Variance if Item Deleted"),
534 N_("Corrected Item-Total Correlation"),
535 N_("Cronbach's Alpha if Item Deleted"));
537 struct pivot_dimension *variables = pivot_dimension_create (
538 table, PIVOT_AXIS_ROW, N_("Variables"));
540 for (size_t i = 0; i < rel->sc[0].n_items; ++i)
542 const struct cronbach *s = &rel->sc[rel->total_start + i];
544 int var_idx = pivot_category_create_leaf (
545 variables->root, pivot_value_new_variable (rel->sc[0].items[i]));
548 moments1_calculate (s->total, NULL, &mean, NULL, NULL, NULL);
551 moments1_calculate (rel->sc[0].m[i], NULL, NULL, &var, NULL, NULL);
553 = (rel->sc[0].variance_of_sums + var - s->variance_of_sums) / 2.0;
558 (cov - var) / sqrt (var * s->variance_of_sums),
561 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
562 pivot_table_put2 (table, i, var_idx,
563 pivot_value_new_number (entries[i]));
566 pivot_table_submit (table);
571 reliability_statistics (const struct reliability *rel)
573 struct pivot_table *table = pivot_table_create (
574 N_("Reliability Statistics"));
575 pivot_table_set_weight_var (table, rel->wv);
577 if (rel->model == MODEL_ALPHA)
579 pivot_dimension_create (table, PIVOT_AXIS_COLUMN,
581 N_("Cronbach's Alpha"), PIVOT_RC_OTHER,
582 N_("N of Items"), PIVOT_RC_COUNT);
584 const struct cronbach *s = &rel->sc[0];
585 pivot_table_put1 (table, 0, pivot_value_new_number (s->alpha));
586 pivot_table_put1 (table, 1, pivot_value_new_number (s->n_items));
590 struct pivot_dimension *statistics = pivot_dimension_create (
591 table, PIVOT_AXIS_ROW, N_("Statistics"));
592 struct pivot_category *alpha = pivot_category_create_group (
593 statistics->root, N_("Cronbach's Alpha"));
594 pivot_category_create_group (alpha, N_("Part 1"),
595 N_("Value"), PIVOT_RC_OTHER,
596 N_("N of Items"), PIVOT_RC_COUNT);
597 pivot_category_create_group (alpha, N_("Part 2"),
598 N_("Value"), PIVOT_RC_OTHER,
599 N_("N of Items"), PIVOT_RC_COUNT);
600 pivot_category_create_leaves (alpha,
601 N_("Total N of Items"), PIVOT_RC_COUNT);
602 pivot_category_create_leaves (statistics->root,
603 N_("Correlation Between Forms"),
605 pivot_category_create_group (statistics->root,
606 N_("Spearman-Brown Coefficient"),
607 N_("Equal Length"), PIVOT_RC_OTHER,
608 N_("Unequal Length"), PIVOT_RC_OTHER);
609 pivot_category_create_leaves (statistics->root,
610 N_("Guttman Split-Half Coefficient"),
613 /* R is the correlation between the two parts */
614 double r0 = rel->sc[0].variance_of_sums -
615 rel->sc[1].variance_of_sums -
616 rel->sc[2].variance_of_sums;
617 double r1 = (r0 / sqrt (rel->sc[1].variance_of_sums)
618 / sqrt (rel->sc[2].variance_of_sums)
621 /* Guttman Split Half Coefficient */
622 double g = 2 * r0 / rel->sc[0].variance_of_sums;
624 double tmp = (1.0 - r1*r1) * rel->sc[1].n_items * rel->sc[2].n_items /
625 pow2 (rel->sc[0].n_items);
632 rel->sc[1].n_items + rel->sc[2].n_items,
635 (sqrt (pow4 (r1) + 4 * pow2 (r1) * tmp) - pow2 (r1)) / (2 * tmp),
638 for (size_t i = 0; i < sizeof entries / sizeof *entries; i++)
639 pivot_table_put1 (table, i, pivot_value_new_number (entries[i]));
642 pivot_table_submit (table);