1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2009, 2010 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <libpspp/misc.h>
23 #include <libpspp/str.h>
24 #include <libpspp/message.h>
27 #include <data/procedure.h>
28 #include <data/missing-values.h>
29 #include <data/casereader.h>
30 #include <data/casegrouper.h>
31 #include <data/dictionary.h>
32 #include <data/format.h>
34 #include <language/lexer/variable-parser.h>
35 #include <language/command.h>
36 #include <language/lexer/lexer.h>
38 #include <math/moments.h>
39 #include <output/tab.h>
40 #include <output/text-item.h>
43 #define _(msgid) gettext (msgid)
44 #define N_(msgid) msgid
48 const struct variable **items;
51 double sum_of_variances;
52 double variance_of_sums;
53 int totals_idx; /* Casereader index into the totals */
55 struct moments1 **m ; /* Moments of the items */
56 struct moments1 *total ; /* Moments of the totals */
61 dump_cronbach (const struct cronbach *s)
64 printf ("N items %d\n", s->n_items);
65 for (i = 0 ; i < s->n_items; ++i)
67 printf ("%s\n", var_get_name (s->items[i]));
70 printf ("Totals idx %d\n", s->totals_idx);
72 printf ("scale variance %g\n", s->variance_of_sums);
73 printf ("alpha %g\n", s->alpha);
87 SUMMARY_TOTAL = 0x0001,
93 const struct variable **variables;
95 enum mv_class exclude;
102 struct string scale_name;
108 enum summary_opts summary;
110 const struct variable *wv;
114 static bool run_reliability (struct dataset *ds, const struct reliability *reliability);
117 cmd_reliability (struct lexer *lexer, struct dataset *ds)
119 const struct dictionary *dict = dataset_dict (ds);
121 struct reliability reliability;
122 reliability.n_variables = 0;
123 reliability.variables = NULL;
124 reliability.model = MODEL_ALPHA;
125 reliability.exclude = MV_ANY;
126 reliability.summary = 0;
128 reliability.wv = dict_get_weight (dict);
130 reliability.total_start = 0;
132 lex_match (lexer, T_SLASH);
134 if (!lex_force_match_id (lexer, "VARIABLES"))
139 lex_match (lexer, T_EQUALS);
141 if (!parse_variables_const (lexer, dict, &reliability.variables, &reliability.n_variables,
142 PV_NO_DUPLICATE | PV_NUMERIC))
145 if (reliability.n_variables < 2)
146 msg (MW, _("Reliability on a single variable is not useful."));
152 /* Create a default Scale */
154 reliability.n_sc = 1;
155 reliability.sc = xzalloc (sizeof (struct cronbach) * reliability.n_sc);
157 ds_init_cstr (&reliability.scale_name, "ANY");
159 c = &reliability.sc[0];
160 c->n_items = reliability.n_variables;
161 c->items = xzalloc (sizeof (struct variable*) * c->n_items);
163 for (i = 0 ; i < c->n_items ; ++i)
164 c->items[i] = reliability.variables[i];
169 while (lex_token (lexer) != T_ENDCMD)
171 lex_match (lexer, T_SLASH);
173 if (lex_match_id (lexer, "SCALE"))
175 struct const_var_set *vs;
176 if ( ! lex_force_match (lexer, T_LPAREN))
179 if ( ! lex_force_string (lexer) )
182 ds_init_substring (&reliability.scale_name, lex_tokss (lexer));
186 if ( ! lex_force_match (lexer, T_RPAREN))
189 lex_match (lexer, T_EQUALS);
191 vs = const_var_set_create_from_array (reliability.variables, reliability.n_variables);
194 if (!parse_const_var_set_vars (lexer, vs, &reliability.sc->items, &reliability.sc->n_items, 0))
196 const_var_set_destroy (vs);
200 const_var_set_destroy (vs);
202 else if (lex_match_id (lexer, "MODEL"))
204 lex_match (lexer, T_EQUALS);
205 if (lex_match_id (lexer, "ALPHA"))
207 reliability.model = MODEL_ALPHA;
209 else if (lex_match_id (lexer, "SPLIT"))
211 reliability.model = MODEL_SPLIT;
212 reliability.split_point = -1;
214 if ( lex_match (lexer, T_LPAREN))
216 lex_force_num (lexer);
217 reliability.split_point = lex_number (lexer);
219 lex_force_match (lexer, T_RPAREN);
225 else if (lex_match_id (lexer, "SUMMARY"))
227 lex_match (lexer, T_EQUALS);
228 if (lex_match_id (lexer, "TOTAL"))
230 reliability.summary |= SUMMARY_TOTAL;
232 else if (lex_match (lexer, T_ALL))
234 reliability.summary = 0xFFFF;
239 else if (lex_match_id (lexer, "MISSING"))
241 lex_match (lexer, T_EQUALS);
242 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
244 if (lex_match_id (lexer, "INCLUDE"))
246 reliability.exclude = MV_SYSTEM;
248 else if (lex_match_id (lexer, "EXCLUDE"))
250 reliability.exclude = MV_ANY;
254 lex_error (lexer, NULL);
261 lex_error (lexer, NULL);
266 if ( reliability.model == MODEL_SPLIT)
269 const struct cronbach *s;
271 reliability.n_sc += 2 ;
272 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
274 s = &reliability.sc[0];
276 reliability.sc[1].n_items =
277 (reliability.split_point == -1) ? s->n_items / 2 : reliability.split_point;
279 reliability.sc[2].n_items = s->n_items - reliability.sc[1].n_items;
280 reliability.sc[1].items = xzalloc (sizeof (struct variable *)
281 * reliability.sc[1].n_items);
283 reliability.sc[2].items = xzalloc (sizeof (struct variable *) *
284 reliability.sc[2].n_items);
286 for (i = 0; i < reliability.sc[1].n_items ; ++i)
287 reliability.sc[1].items[i] = s->items[i];
289 while (i < s->n_items)
291 reliability.sc[2].items[i - reliability.sc[1].n_items] = s->items[i];
296 if ( reliability.summary & SUMMARY_TOTAL)
299 const int base_sc = reliability.n_sc;
301 reliability.total_start = base_sc;
303 reliability.n_sc += reliability.sc[0].n_items ;
304 reliability.sc = xrealloc (reliability.sc, sizeof (struct cronbach) * reliability.n_sc);
307 for (i = 0 ; i < reliability.sc[0].n_items; ++i )
311 struct cronbach *s = &reliability.sc[i + base_sc];
313 s->n_items = reliability.sc[0].n_items - 1;
314 s->items = xzalloc (sizeof (struct variable *) * s->n_items);
315 for (v_src = 0 ; v_src < reliability.sc[0].n_items ; ++v_src)
318 s->items[v_dest++] = reliability.sc[0].items[v_src];
324 if ( ! run_reliability (ds, &reliability))
327 free (reliability.variables);
331 free (reliability.variables);
337 do_reliability (struct casereader *group, struct dataset *ds,
338 const struct reliability *rel);
341 static void reliability_summary_total (const struct reliability *rel);
343 static void reliability_statistics (const struct reliability *rel);
347 run_reliability (struct dataset *ds, const struct reliability *reliability)
349 struct dictionary *dict = dataset_dict (ds);
351 struct casereader *group;
353 struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
356 while (casegrouper_get_next_group (grouper, &group))
358 do_reliability (group, ds, reliability);
360 reliability_statistics (reliability);
362 if (reliability->summary & SUMMARY_TOTAL )
363 reliability_summary_total (reliability);
366 ok = casegrouper_destroy (grouper);
367 ok = proc_commit (ds) && ok;
376 /* Return the sum of all the item variables in S */
378 append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
381 const struct cronbach *s = aux;
384 for (v = 0 ; v < s->n_items; ++v)
386 sum += case_data (c, s->items[v])->f;
393 case_processing_summary (casenumber n_valid, casenumber n_missing,
394 const struct dictionary *dict);
398 alpha (int k, double sum_of_variances, double variance_of_sums)
400 return k / ( k - 1.0) * ( 1 - sum_of_variances / variance_of_sums);
404 do_reliability (struct casereader *input, struct dataset *ds,
405 const struct reliability *rel)
410 casenumber n_missing ;
411 casenumber n_valid = 0;
414 for (si = 0 ; si < rel->n_sc; ++si)
416 struct cronbach *s = &rel->sc[si];
418 s->m = xzalloc (sizeof (s->m) * s->n_items);
419 s->total = moments1_create (MOMENT_VARIANCE);
421 for (i = 0 ; i < s->n_items ; ++i )
422 s->m[i] = moments1_create (MOMENT_VARIANCE);
425 input = casereader_create_filter_missing (input,
432 for (si = 0 ; si < rel->n_sc; ++si)
434 struct cronbach *s = &rel->sc[si];
437 s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
439 casereader_create_append_numeric (input, append_sum,
443 for (; (c = casereader_read (input)) != NULL; case_unref (c))
448 for (si = 0; si < rel->n_sc; ++si)
450 struct cronbach *s = &rel->sc[si];
452 for (i = 0 ; i < s->n_items ; ++i )
453 moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);
455 moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
458 casereader_destroy (input);
460 for (si = 0; si < rel->n_sc; ++si)
462 struct cronbach *s = &rel->sc[si];
464 s->sum_of_variances = 0;
465 for (i = 0 ; i < s->n_items ; ++i )
467 double weight, mean, variance;
468 moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
470 s->sum_of_variances += variance;
473 moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
477 alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
480 text_item_submit (text_item_create_format (TEXT_ITEM_PARAGRAPH, "Scale: %s",
481 ds_cstr (&rel->scale_name)));
483 case_processing_summary (n_valid, n_missing, dataset_dict (ds));
491 case_processing_summary (casenumber n_valid, casenumber n_missing,
492 const struct dictionary *dict)
494 const struct variable *wv = dict_get_weight (dict);
495 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
500 int heading_columns = 2;
501 int heading_rows = 1;
502 struct tab_table *tbl;
503 tbl = tab_create (n_cols, n_rows);
504 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
506 tab_title (tbl, _("Case Processing Summary"));
508 /* Vertical lines for the data only */
513 n_cols - 1, n_rows - 1);
515 /* Box around table */
520 n_cols - 1, n_rows - 1);
523 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
525 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
528 tab_text (tbl, 0, heading_rows, TAB_LEFT | TAT_TITLE,
531 tab_text (tbl, 1, heading_rows, TAB_LEFT | TAT_TITLE,
534 tab_text (tbl, 1, heading_rows + 1, TAB_LEFT | TAT_TITLE,
537 tab_text (tbl, 1, heading_rows + 2, TAB_LEFT | TAT_TITLE,
540 tab_text (tbl, heading_columns, 0, TAB_CENTER | TAT_TITLE,
543 tab_text (tbl, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE, _("%"));
545 total = n_missing + n_valid;
547 tab_double (tbl, 2, heading_rows, TAB_RIGHT,
551 tab_double (tbl, 2, heading_rows + 1, TAB_RIGHT,
555 tab_double (tbl, 2, heading_rows + 2, TAB_RIGHT,
559 tab_double (tbl, 3, heading_rows, TAB_RIGHT,
560 100 * n_valid / (double) total, NULL);
563 tab_double (tbl, 3, heading_rows + 1, TAB_RIGHT,
564 100 * n_missing / (double) total, NULL);
567 tab_double (tbl, 3, heading_rows + 2, TAB_RIGHT,
568 100 * total / (double) total, NULL);
577 reliability_summary_total (const struct reliability *rel)
580 const int n_cols = 5;
581 const int heading_columns = 1;
582 const int heading_rows = 1;
583 const int n_rows = rel->sc[0].n_items + heading_rows ;
585 struct tab_table *tbl = tab_create (n_cols, n_rows);
586 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
588 tab_title (tbl, _("Item-Total Statistics"));
590 /* Vertical lines for the data only */
595 n_cols - 1, n_rows - 1);
597 /* Box around table */
602 n_cols - 1, n_rows - 1);
605 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
607 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
609 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
610 _("Scale Mean if Item Deleted"));
612 tab_text (tbl, 2, 0, TAB_CENTER | TAT_TITLE,
613 _("Scale Variance if Item Deleted"));
615 tab_text (tbl, 3, 0, TAB_CENTER | TAT_TITLE,
616 _("Corrected Item-Total Correlation"));
618 tab_text (tbl, 4, 0, TAB_CENTER | TAT_TITLE,
619 _("Cronbach's Alpha if Item Deleted"));
622 for (i = 0 ; i < rel->sc[0].n_items; ++i)
624 double cov, item_to_total_r;
625 double mean, weight, var;
627 const struct cronbach *s = &rel->sc[rel->total_start + i];
628 tab_text (tbl, 0, heading_rows + i, TAB_LEFT| TAT_TITLE,
629 var_to_string (rel->sc[0].items[i]));
631 moments1_calculate (s->total, &weight, &mean, &var, 0, 0);
633 tab_double (tbl, 1, heading_rows + i, TAB_RIGHT,
636 tab_double (tbl, 2, heading_rows + i, TAB_RIGHT,
637 s->variance_of_sums, NULL);
639 tab_double (tbl, 4, heading_rows + i, TAB_RIGHT,
643 moments1_calculate (rel->sc[0].m[i], &weight, &mean, &var, 0,0);
644 cov = rel->sc[0].variance_of_sums + var - s->variance_of_sums;
647 item_to_total_r = (cov - var) / (sqrt(var) * sqrt (s->variance_of_sums));
650 tab_double (tbl, 3, heading_rows + i, TAB_RIGHT,
651 item_to_total_r, NULL);
659 static void reliability_statistics_model_alpha (struct tab_table *tbl,
660 const struct reliability *rel);
662 static void reliability_statistics_model_split (struct tab_table *tbl,
663 const struct reliability *rel);
666 struct reliability_output_table
672 void (*populate) (struct tab_table *, const struct reliability *);
676 static struct reliability_output_table rol[2] =
678 { 2, 2, 1, 1, reliability_statistics_model_alpha},
679 { 4, 9, 3, 0, reliability_statistics_model_split}
683 reliability_statistics (const struct reliability *rel)
685 int n_cols = rol[rel->model].n_cols;
686 int n_rows = rol[rel->model].n_rows;
687 int heading_columns = rol[rel->model].heading_cols;
688 int heading_rows = rol[rel->model].heading_rows;
690 struct tab_table *tbl = tab_create (n_cols, n_rows);
691 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
693 tab_title (tbl, _("Reliability Statistics"));
695 /* Vertical lines for the data only */
700 n_cols - 1, n_rows - 1);
702 /* Box around table */
707 n_cols - 1, n_rows - 1);
710 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
712 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
714 if ( rel->model == MODEL_ALPHA )
715 reliability_statistics_model_alpha (tbl, rel);
716 else if (rel->model == MODEL_SPLIT )
717 reliability_statistics_model_split (tbl, rel);
724 reliability_statistics_model_alpha (struct tab_table *tbl,
725 const struct reliability *rel)
727 const struct variable *wv = rel->wv;
728 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
730 const struct cronbach *s = &rel->sc[0];
732 tab_text (tbl, 0, 0, TAB_CENTER | TAT_TITLE,
733 _("Cronbach's Alpha"));
735 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
738 tab_double (tbl, 0, 1, TAB_RIGHT, s->alpha, NULL);
740 tab_double (tbl, 1, 1, TAB_RIGHT, s->n_items, wfmt);
745 reliability_statistics_model_split (struct tab_table *tbl,
746 const struct reliability *rel)
748 const struct variable *wv = rel->wv;
749 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
751 tab_text (tbl, 0, 0, TAB_LEFT,
752 _("Cronbach's Alpha"));
754 tab_text (tbl, 1, 0, TAB_LEFT,
757 tab_text (tbl, 2, 0, TAB_LEFT,
760 tab_text (tbl, 2, 1, TAB_LEFT,
765 tab_text (tbl, 1, 2, TAB_LEFT,
768 tab_text (tbl, 2, 2, TAB_LEFT,
771 tab_text (tbl, 2, 3, TAB_LEFT,
776 tab_text (tbl, 1, 4, TAB_LEFT,
777 _("Total N of Items"));
779 tab_text (tbl, 0, 5, TAB_LEFT,
780 _("Correlation Between Forms"));
783 tab_text (tbl, 0, 6, TAB_LEFT,
784 _("Spearman-Brown Coefficient"));
786 tab_text (tbl, 1, 6, TAB_LEFT,
789 tab_text (tbl, 1, 7, TAB_LEFT,
790 _("Unequal Length"));
793 tab_text (tbl, 0, 8, TAB_LEFT,
794 _("Guttman Split-Half Coefficient"));
798 tab_double (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, NULL);
799 tab_double (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, NULL);
801 tab_double (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, wfmt);
802 tab_double (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, wfmt);
804 tab_double (tbl, 3, 4, TAB_RIGHT,
805 rel->sc[1].n_items + rel->sc[2].n_items, wfmt);
808 /* R is the correlation between the two parts */
809 double r = rel->sc[0].variance_of_sums -
810 rel->sc[1].variance_of_sums -
811 rel->sc[2].variance_of_sums ;
813 /* Guttman Split Half Coefficient */
814 double g = 2 * r / rel->sc[0].variance_of_sums;
816 /* Unequal Length Spearman Brown Coefficient, and
817 intermediate value used in the computation thereof */
820 r /= sqrt (rel->sc[1].variance_of_sums);
821 r /= sqrt (rel->sc[2].variance_of_sums);
824 tab_double (tbl, 3, 5, TAB_RIGHT, r, NULL);
826 /* Equal length Spearman-Brown Coefficient */
827 tab_double (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), NULL);
829 tab_double (tbl, 3, 8, TAB_RIGHT, g, NULL);
831 tmp = (1.0 - r*r) * rel->sc[1].n_items * rel->sc[2].n_items /
832 pow2 (rel->sc[0].n_items);
834 uly = sqrt( pow4 (r) + 4 * pow2 (r) * tmp);
838 tab_double (tbl, 3, 7, TAB_RIGHT, uly, NULL);