1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010, 2011, 2013, 2015 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include "data/casegrouper.h"
22 #include "data/casereader.h"
23 #include "data/dataset.h"
24 #include "data/dictionary.h"
25 #include "data/format.h"
26 #include "data/missing-values.h"
27 #include "language/command.h"
28 #include "language/lexer/lexer.h"
29 #include "language/lexer/variable-parser.h"
30 #include "libpspp/message.h"
31 #include "libpspp/misc.h"
32 #include "libpspp/str.h"
33 #include "math/moments.h"
34 #include "output/tab.h"
35 #include "output/text-item.h"
38 #define _(msgid) gettext (msgid)
39 #define N_(msgid) msgid
43 const struct variable **items;
46 double sum_of_variances;
47 double variance_of_sums;
48 int totals_idx; /* Casereader index into the totals */
50 struct moments1 **m ; /* Moments of the items */
51 struct moments1 *total ; /* Moments of the totals */
56 dump_cronbach (const struct cronbach *s)
59 printf ("N items %d\n", s->n_items);
60 for (i = 0 ; i < s->n_items; ++i)
62 printf ("%s\n", var_get_name (s->items[i]));
65 printf ("Totals idx %d\n", s->totals_idx);
67 printf ("scale variance %g\n", s->variance_of_sums);
68 printf ("alpha %g\n", s->alpha);
82 SUMMARY_TOTAL = 0x0001,
88 const struct variable **variables;
90 enum mv_class exclude;
97 struct string scale_name;
103 enum summary_opts summary;
105 const struct variable *wv;
109 static bool run_reliability (struct dataset *ds, const struct reliability *reliability);
112 reliability_destroy (struct reliability *rel)
115 ds_destroy (&rel->scale_name);
117 for (j = 0; j < rel->n_sc ; ++j)
120 free (rel->sc[j].items);
121 moments1_destroy (rel->sc[j].total);
123 for (x = 0; x < rel->sc[j].n_items; ++x)
124 free (rel->sc[j].m[x]);
129 free (rel->variables);
133 cmd_reliability (struct lexer *lexer, struct dataset *ds)
135 const struct dictionary *dict = dataset_dict (ds);
137 struct reliability reliability;
138 reliability.n_variables = 0;
139 reliability.variables = NULL;
140 reliability.model = MODEL_ALPHA;
141 reliability.exclude = MV_ANY;
142 reliability.summary = 0;
143 reliability.n_sc = 0;
144 reliability.sc = NULL;
145 reliability.wv = dict_get_weight (dict);
146 reliability.total_start = 0;
147 ds_init_empty (&reliability.scale_name);
150 lex_match (lexer, T_SLASH);
152 if (!lex_force_match_id (lexer, "VARIABLES"))
157 lex_match (lexer, T_EQUALS);
159 if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables,
160 PV_NO_DUPLICATE | PV_NUMERIC))
163 if (reliability.n_variables < 2)
164 msg (MW, _("Reliability on a single variable is not useful."));
170 /* Create a default Scale */
172 reliability.n_sc = 1;
173 reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc);
175 ds_assign_cstr (&reliability.scale_name, "ANY");
177 c = &reliability.sc[0];
178 c->n_items = reliability.n_variables;
179 c->items = xzalloc (sizeof (struct variable*) * c->n_items);
181 for (i = 0 ; i < c->n_items ; ++i)
182 c->items[i] = reliability.variables[i];
187 while (lex_token (lexer) != T_ENDCMD)
189 lex_match (lexer, T_SLASH);
191 if (lex_match_id (lexer, "SCALE"))
193 struct const_var_set *vs;
194 if ( ! lex_force_match (lexer, T_LPAREN))
197 if ( ! lex_force_string (lexer) )
200 ds_assign_substring (&reliability.scale_name, lex_tokss (lexer));
204 if ( ! lex_force_match (lexer, T_RPAREN))
207 lex_match (lexer, T_EQUALS);
209 vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables);
211 free (reliability.sc->items);
212 if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0))
214 const_var_set_destroy (vs);
218 const_var_set_destroy (vs);
220 else if (lex_match_id (lexer, "MODEL"))
222 lex_match (lexer, T_EQUALS);
223 if (lex_match_id (lexer, "ALPHA"))
225 reliability.model = MODEL_ALPHA;
227 else if (lex_match_id (lexer, "SPLIT"))
229 reliability.model = MODEL_SPLIT;
230 reliability.split_point = -1;
232 if ( lex_match (lexer, T_LPAREN)
233 && lex_force_num (lexer))
235 reliability.split_point = lex_number (lexer);
237 lex_force_match (lexer, T_RPAREN);
243 else if (lex_match_id (lexer, "SUMMARY"))
245 lex_match (lexer, T_EQUALS);
246 if (lex_match_id (lexer, "TOTAL"))
248 reliability.summary |= SUMMARY_TOTAL;
250 else if (lex_match (lexer, T_ALL))
252 reliability.summary = 0xFFFF;
257 else if (lex_match_id (lexer, "MISSING"))
259 lex_match (lexer, T_EQUALS);
260 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
262 if (lex_match_id (lexer, "INCLUDE"))
264 reliability.exclude = MV_SYSTEM;
266 else if (lex_match_id (lexer, "EXCLUDE"))
268 reliability.exclude = MV_ANY;
272 lex_error (lexer, NULL);
277 else if (lex_match_id (lexer, "STATISTICS"))
279 lex_match (lexer, T_EQUALS);
280 msg (SW, _("The STATISTICS subcommand is not yet implemented. "
281 "No statistics will be produced."));
282 while (lex_match (lexer, T_ID))
287 lex_error (lexer, NULL);
292 if ( reliability.model == MODEL_SPLIT)
295 const struct cronbach *s;
297 if ( reliability.split_point >= reliability.n_variables)
299 msg (ME, _("The split point must be less than the number of variables"));
303 reliability.n_sc += 2 ;
304 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
306 s = &reliability.sc[0];
308 reliability.sc[1].n_items =
309 (reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point;
311 reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items;
312 reliability.sc[1].items = xzalloc (sizeof (struct variable *)
313 * reliability.sc[1].n_items);
315 reliability.sc[2].items = xzalloc (sizeof (struct variable *) *
316 reliability.sc[2].n_items);
318 for (i = 0; i < reliability.sc[1].n_items ; ++i)
319 reliability.sc[1].items[i] = s->items[i];
321 while (i < s->n_items)
323 reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i];
328 if ( reliability.summary & SUMMARY_TOTAL)
331 const int base_sc = reliability.n_sc;
333 reliability.total_start = base_sc;
335 reliability.n_sc += reliability.sc[0].n_items ;
336 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
339 for (i = 0 ; i < reliability.sc[0].n_items; ++i )
343 struct cronbach *s = &reliability.sc[i + base_sc];
345 s->n_items = reliability.sc[0].n_items - 1;
346 s->items = xzalloc (sizeof (struct variable *) * s->n_items);
347 for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src)
350 s->items[v_dest++] = reliability.sc[0].items[v_src];
356 if ( ! run_reliability (ds, &reliability))
359 reliability_destroy (&reliability);
363 reliability_destroy (&reliability);
369 do_reliability (struct casereader *group, struct dataset *ds,
370 const struct reliability *rel);
373 static void reliability_summary_total (const struct reliability *rel);
375 static void reliability_statistics (const struct reliability *rel);
379 run_reliability (struct dataset *ds, const struct reliability *reliability)
381 struct dictionary *dict = dataset_dict (ds);
383 struct casereader *group;
385 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
388 for (si = 0 ; si < reliability->n_sc; ++si)
390 struct cronbach *s = &reliability->sc[si];
393 s->m = xzalloc (sizeof *s->m * s->n_items);
394 s->total = moments1_create (MOMENT_VARIANCE);
396 for (i = 0 ; i < s->n_items ; ++i )
397 s->m[i] = moments1_create (MOMENT_VARIANCE);
401 while (casegrouper_get_next_group (grouper, &group))
403 do_reliability (group, ds, reliability);
405 reliability_statistics (reliability);
407 if (reliability->summary & SUMMARY_TOTAL )
408 reliability_summary_total (reliability);
411 ok = casegrouper_destroy (grouper);
412 ok = proc_commit (ds) && ok;
421 /* Return the sum of all the item variables in S */
423 append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
426 const struct cronbach *s = aux;
429 for (v = 0 ; v < s->n_items; ++v)
431 sum += case_data (c, s->items[v])->f;
438 case_processing_summary (casenumber n_valid, casenumber n_missing,
439 const struct dictionary *dict);
443 alpha (int k, double sum_of_variances, double variance_of_sums)
445 return k / ( k - 1.0) * ( 1 - sum_of_variances / variance_of_sums);
449 do_reliability (struct casereader *input, struct dataset *ds,
450 const struct reliability *rel)
455 casenumber n_missing ;
456 casenumber n_valid = 0;
459 for (si = 0 ; si < rel->n_sc; ++si)
461 struct cronbach *s = &rel->sc[si];
463 moments1_clear (s->total);
465 for (i = 0 ; i < s->n_items ; ++i )
466 moments1_clear (s->m[i]);
469 input = casereader_create_filter_missing (input,
476 for (si = 0 ; si < rel->n_sc; ++si)
478 struct cronbach *s = &rel->sc[si];
481 s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
483 casereader_create_append_numeric (input, append_sum,
487 for (; (c = casereader_read (input)) != NULL; case_unref (c))
492 for (si = 0; si < rel->n_sc; ++si)
494 struct cronbach *s = &rel->sc[si];
496 for (i = 0 ; i < s->n_items ; ++i )
497 moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);
499 moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
502 casereader_destroy (input);
504 for (si = 0; si < rel->n_sc; ++si)
506 struct cronbach *s = &rel->sc[si];
508 s->sum_of_variances = 0;
509 for (i = 0 ; i < s->n_items ; ++i )
511 double weight, mean, variance;
512 moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
514 s->sum_of_variances += variance;
517 moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
521 alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
524 text_item_submit (text_item_create_format (TEXT_ITEM_PARAGRAPH, _("Scale: %s"),
525 ds_cstr (&rel->scale_name)));
527 case_processing_summary (n_valid, n_missing, dataset_dict (ds));
535 case_processing_summary (casenumber n_valid, casenumber n_missing,
536 const struct dictionary *dict)
538 const struct variable *wv = dict_get_weight (dict);
539 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
544 int heading_columns = 2;
545 int heading_rows = 1;
546 struct tab_table *tbl;
547 tbl = tab_create (n_cols, n_rows);
548 tab_set_format (tbl, RC_WEIGHT, wfmt);
549 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
551 tab_title (tbl, _("Case Processing Summary"));
553 /* Vertical lines for the data only */
558 n_cols - 1, n_rows - 1);
560 /* Box around table */
565 n_cols - 1, n_rows - 1);
568 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
570 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
573 tab_text (tbl, 0, heading_rows, TAB_LEFT | TAT_TITLE,
576 tab_text (tbl, 1, heading_rows, TAB_LEFT | TAT_TITLE,
579 tab_text (tbl, 1, heading_rows + 1, TAB_LEFT | TAT_TITLE,
582 tab_text (tbl, 1, heading_rows + 2, TAB_LEFT | TAT_TITLE,
585 tab_text (tbl, heading_columns, 0, TAB_CENTER | TAT_TITLE,
588 tab_text (tbl, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE, _("%"));
590 total = n_missing + n_valid;
592 tab_double (tbl, 2, heading_rows, TAB_RIGHT,
593 n_valid, NULL, RC_WEIGHT);
596 tab_double (tbl, 2, heading_rows + 1, TAB_RIGHT,
597 n_missing, NULL, RC_WEIGHT);
600 tab_double (tbl, 2, heading_rows + 2, TAB_RIGHT,
601 total, NULL, RC_WEIGHT);
604 tab_double (tbl, 3, heading_rows, TAB_RIGHT,
605 100 * n_valid / (double) total, NULL, RC_OTHER);
608 tab_double (tbl, 3, heading_rows + 1, TAB_RIGHT,
609 100 * n_missing / (double) total, NULL, RC_OTHER);
612 tab_double (tbl, 3, heading_rows + 2, TAB_RIGHT,
613 100 * total / (double) total, NULL, RC_OTHER);
622 reliability_summary_total (const struct reliability *rel)
625 const int n_cols = 5;
626 const int heading_columns = 1;
627 const int heading_rows = 1;
628 const int n_rows = rel->sc[0].n_items + heading_rows ;
629 const struct variable *wv = rel->wv;
630 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
631 struct tab_table *tbl = tab_create (n_cols, n_rows);
632 tab_set_format (tbl, RC_WEIGHT, wfmt);
633 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
635 tab_title (tbl, _("Item-Total Statistics"));
637 /* Vertical lines for the data only */
642 n_cols - 1, n_rows - 1);
644 /* Box around table */
649 n_cols - 1, n_rows - 1);
652 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
654 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
656 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
657 _("Scale Mean if Item Deleted"));
659 tab_text (tbl, 2, 0, TAB_CENTER | TAT_TITLE,
660 _("Scale Variance if Item Deleted"));
662 tab_text (tbl, 3, 0, TAB_CENTER | TAT_TITLE,
663 _("Corrected Item-Total Correlation"));
665 tab_text (tbl, 4, 0, TAB_CENTER | TAT_TITLE,
666 _("Cronbach's Alpha if Item Deleted"));
669 for (i = 0 ; i < rel->sc[0].n_items; ++i)
671 double cov, item_to_total_r;
672 double mean, weight, var;
674 const struct cronbach *s = &rel->sc[rel->total_start + i];
675 tab_text (tbl, 0, heading_rows + i, TAB_LEFT| TAT_TITLE,
676 var_to_string (rel->sc[0].items[i]));
678 moments1_calculate (s->total, &weight, &mean, &var, 0, 0);
680 tab_double (tbl, 1, heading_rows + i, TAB_RIGHT,
681 mean, NULL, RC_OTHER);
683 tab_double (tbl, 2, heading_rows + i, TAB_RIGHT,
684 s->variance_of_sums, NULL, RC_OTHER);
686 tab_double (tbl, 4, heading_rows + i, TAB_RIGHT,
687 s->alpha, NULL, RC_OTHER);
690 moments1_calculate (rel->sc[0].m[i], &weight, &mean, &var, 0,0);
691 cov = rel->sc[0].variance_of_sums + var - s->variance_of_sums;
694 item_to_total_r = (cov - var) / (sqrt(var) * sqrt (s->variance_of_sums));
697 tab_double (tbl, 3, heading_rows + i, TAB_RIGHT,
698 item_to_total_r, NULL, RC_OTHER);
706 static void reliability_statistics_model_alpha (struct tab_table *tbl,
707 const struct reliability *rel);
709 static void reliability_statistics_model_split (struct tab_table *tbl,
710 const struct reliability *rel);
713 struct reliability_output_table
719 void (*populate) (struct tab_table *, const struct reliability *);
723 static struct reliability_output_table rol[2] =
725 { 2, 2, 1, 1, reliability_statistics_model_alpha},
726 { 4, 9, 3, 0, reliability_statistics_model_split}
730 reliability_statistics (const struct reliability *rel)
732 int n_cols = rol[rel->model].n_cols;
733 int n_rows = rol[rel->model].n_rows;
734 int heading_columns = rol[rel->model].heading_cols;
735 int heading_rows = rol[rel->model].heading_rows;
736 const struct variable *wv = rel->wv;
737 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
738 struct tab_table *tbl = tab_create (n_cols, n_rows);
739 tab_set_format (tbl, RC_WEIGHT, wfmt);
741 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
743 tab_title (tbl, _("Reliability Statistics"));
745 /* Vertical lines for the data only */
750 n_cols - 1, n_rows - 1);
752 /* Box around table */
757 n_cols - 1, n_rows - 1);
760 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
762 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
764 if ( rel->model == MODEL_ALPHA )
765 reliability_statistics_model_alpha (tbl, rel);
766 else if (rel->model == MODEL_SPLIT )
767 reliability_statistics_model_split (tbl, rel);
774 reliability_statistics_model_alpha (struct tab_table *tbl,
775 const struct reliability *rel)
777 const struct cronbach *s = &rel->sc[0];
779 tab_text (tbl, 0, 0, TAB_CENTER | TAT_TITLE,
780 _("Cronbach's Alpha"));
782 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
785 tab_double (tbl, 0, 1, TAB_RIGHT, s->alpha, NULL, RC_OTHER);
787 tab_double (tbl, 1, 1, TAB_RIGHT, s->n_items, NULL, RC_WEIGHT);
792 reliability_statistics_model_split (struct tab_table *tbl,
793 const struct reliability *rel)
795 tab_text (tbl, 0, 0, TAB_LEFT,
796 _("Cronbach's Alpha"));
798 tab_text (tbl, 1, 0, TAB_LEFT,
801 tab_text (tbl, 2, 0, TAB_LEFT,
804 tab_text (tbl, 2, 1, TAB_LEFT,
807 tab_text (tbl, 1, 2, TAB_LEFT,
810 tab_text (tbl, 2, 2, TAB_LEFT,
813 tab_text (tbl, 2, 3, TAB_LEFT,
816 tab_text (tbl, 1, 4, TAB_LEFT,
817 _("Total N of Items"));
819 tab_text (tbl, 0, 5, TAB_LEFT,
820 _("Correlation Between Forms"));
822 tab_text (tbl, 0, 6, TAB_LEFT,
823 _("Spearman-Brown Coefficient"));
825 tab_text (tbl, 1, 6, TAB_LEFT,
828 tab_text (tbl, 1, 7, TAB_LEFT,
829 _("Unequal Length"));
832 tab_text (tbl, 0, 8, TAB_LEFT,
833 _("Guttman Split-Half Coefficient"));
837 tab_double (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, NULL, RC_OTHER);
838 tab_double (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, NULL, RC_OTHER);
840 tab_double (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, NULL, RC_WEIGHT);
841 tab_double (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, NULL, RC_WEIGHT);
843 tab_double (tbl, 3, 4, TAB_RIGHT,
844 rel->sc[1].n_items + rel->sc[2].n_items, NULL, RC_WEIGHT);
847 /* R is the correlation between the two parts */
848 double r = rel->sc[0].variance_of_sums -
849 rel->sc[1].variance_of_sums -
850 rel->sc[2].variance_of_sums ;
852 /* Guttman Split Half Coefficient */
853 double g = 2 * r / rel->sc[0].variance_of_sums;
855 /* Unequal Length Spearman Brown Coefficient, and
856 intermediate value used in the computation thereof */
859 r /= sqrt (rel->sc[1].variance_of_sums);
860 r /= sqrt (rel->sc[2].variance_of_sums);
863 tab_double (tbl, 3, 5, TAB_RIGHT, r, NULL, RC_OTHER);
865 /* Equal length Spearman-Brown Coefficient */
866 tab_double (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), NULL, RC_OTHER);
868 tab_double (tbl, 3, 8, TAB_RIGHT, g, NULL, RC_OTHER);
870 tmp = (1.0 - r*r) * rel->sc[1].n_items * rel->sc[2].n_items /
871 pow2 (rel->sc[0].n_items);
873 uly = sqrt( pow4 (r) + 4 * pow2 (r) * tmp);
877 tab_double (tbl, 3, 7, TAB_RIGHT, uly, NULL, RC_OTHER);