1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2008, 2009 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/case.h>
22 #include <data/casegrouper.h>
23 #include <data/casereader.h>
24 #include <data/dictionary.h>
25 #include <data/procedure.h>
26 #include <data/variable.h>
27 #include <language/command.h>
28 #include <libpspp/misc.h>
29 #include <math/moments.h>
30 #include <output/manager.h>
31 #include <output/table.h>
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
44 *^variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
46 missing=miss:!exclude/include;
55 static int rel_custom_scale (struct lexer *lexer, struct dataset *ds,
56 struct cmd_reliability *p, void *aux);
58 static int rel_custom_model (struct lexer *, struct dataset *,
59 struct cmd_reliability *, void *);
61 int cmd_reliability (struct lexer *lexer, struct dataset *ds);
65 const struct variable **items;
68 double sum_of_variances;
69 double variance_of_sums;
70 int totals_idx; /* Casereader index into the totals */
72 struct moments1 **m ; /* Moments of the items */
73 struct moments1 *total ; /* Moments of the totals */
78 dump_cronbach (const struct cronbach *s)
81 printf ("N items %d\n", s->n_items);
82 for (i = 0 ; i < s->n_items; ++i)
84 printf ("%s\n", var_get_name (s->items[i]));
87 printf ("Totals idx %d\n", s->totals_idx);
89 printf ("scale variance %g\n", s->variance_of_sums);
90 printf ("alpha %g\n", s->alpha);
104 const struct dictionary *dict;
105 const struct variable **variables;
107 enum mv_class exclude;
114 struct string scale_name;
122 alpha (int k, double sum_of_variances, double variance_of_sums)
124 return k / ( k - 1.0) * ( 1 - sum_of_variances / variance_of_sums);
127 static void reliability_summary_total (const struct reliability *rel);
129 static void reliability_statistics (const struct reliability *rel);
134 run_reliability (struct casereader *group, struct dataset *ds,
135 struct reliability *rel);
139 cmd_reliability (struct lexer *lexer, struct dataset *ds)
143 struct casegrouper *grouper;
144 struct casereader *group;
145 struct cmd_reliability cmd;
147 struct reliability rel = {NULL,
148 NULL, 0, MV_ANY, NULL, 0, -1,
149 DS_EMPTY_INITIALIZER,
152 cmd.v_variables = NULL;
154 if ( ! parse_reliability (lexer, ds, &cmd, &rel) )
159 rel.dict = dataset_dict (ds);
160 rel.variables = cmd.v_variables;
161 rel.n_variables = cmd.n_variables;
162 rel.exclude = MV_ANY;
168 /* Create a default Scale */
171 rel.sc = xzalloc (sizeof (struct cronbach) * rel.n_sc);
173 ds_init_cstr (&rel.scale_name, "ANY");
176 c->n_items = cmd.n_variables;
177 c->items = xzalloc (sizeof (struct variable*) * c->n_items);
179 for (i = 0 ; i < c->n_items ; ++i)
180 c->items[i] = cmd.v_variables[i];
183 if ( cmd.miss == REL_INCLUDE)
184 rel.exclude = MV_SYSTEM;
186 if ( rel.model == MODEL_SPLIT)
189 const struct cronbach *s;
192 rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc);
197 (rel.split_point == -1) ? s->n_items / 2 : rel.split_point;
199 rel.sc[2].n_items = s->n_items - rel.sc[1].n_items;
200 rel.sc[1].items = xzalloc (sizeof (struct variable *)
201 * rel.sc[1].n_items);
203 rel.sc[2].items = xzalloc (sizeof (struct variable *) *
206 for (i = 0; i < rel.sc[1].n_items ; ++i)
207 rel.sc[1].items[i] = s->items[i];
209 while (i < s->n_items)
211 rel.sc[2].items[i - rel.sc[1].n_items] = s->items[i];
216 if (cmd.a_summary[REL_SUM_TOTAL])
219 const int base_sc = rel.n_sc;
221 rel.total_start = base_sc;
223 rel.n_sc += rel.sc[0].n_items ;
224 rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc);
226 for (i = 0 ; i < rel.sc[0].n_items; ++i )
230 struct cronbach *s = &rel.sc[i + base_sc];
232 s->n_items = rel.sc[0].n_items - 1;
233 s->items = xzalloc (sizeof (struct variable *) * s->n_items);
234 for (v_src = 0 ; v_src < rel.sc[0].n_items ; ++v_src)
237 s->items[v_dest++] = rel.sc[0].items[v_src];
243 grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
244 while (casegrouper_get_next_group (grouper, &group))
246 run_reliability (group, ds, &rel);
248 reliability_statistics (&rel);
250 if (cmd.a_summary[REL_SUM_TOTAL])
251 reliability_summary_total (&rel);
253 ok = casegrouper_destroy (grouper);
254 ok = proc_commit (ds) && ok;
256 free_reliability (&cmd);
260 /* Free all the stuff */
261 for (i = 0 ; i < rel.n_sc; ++i)
264 struct cronbach *c = &rel.sc[i];
267 moments1_destroy (c->total);
270 for (x = 0 ; x < c->n_items; ++x)
271 moments1_destroy (c->m[x]);
276 ds_destroy (&rel.scale_name);
285 /* Return the sum of all the item variables in S */
287 append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
290 const struct cronbach *s = aux;
293 for (v = 0 ; v < s->n_items; ++v)
295 sum += case_data (c, s->items[v])->f;
302 static void case_processing_summary (casenumber n_valid, casenumber n_missing,
303 const struct dictionary *dict);
306 run_reliability (struct casereader *input, struct dataset *ds,
307 struct reliability *rel)
312 casenumber n_missing ;
313 casenumber n_valid = 0;
316 for (si = 0 ; si < rel->n_sc; ++si)
318 struct cronbach *s = &rel->sc[si];
320 s->m = xzalloc (sizeof (s->m) * s->n_items);
321 s->total = moments1_create (MOMENT_VARIANCE);
323 for (i = 0 ; i < s->n_items ; ++i )
324 s->m[i] = moments1_create (MOMENT_VARIANCE);
327 input = casereader_create_filter_missing (input,
334 for (si = 0 ; si < rel->n_sc; ++si)
336 struct cronbach *s = &rel->sc[si];
339 s->totals_idx = caseproto_get_n_widths (casereader_get_proto (input));
341 casereader_create_append_numeric (input, append_sum,
345 for (; (c = casereader_read (input)) != NULL; case_unref (c))
350 for (si = 0; si < rel->n_sc; ++si)
352 struct cronbach *s = &rel->sc[si];
354 for (i = 0 ; i < s->n_items ; ++i )
355 moments1_add (s->m[i], case_data (c, s->items[i])->f, weight);
357 moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight);
360 casereader_destroy (input);
362 for (si = 0; si < rel->n_sc; ++si)
364 struct cronbach *s = &rel->sc[si];
366 s->sum_of_variances = 0;
367 for (i = 0 ; i < s->n_items ; ++i )
369 double weight, mean, variance;
370 moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
372 s->sum_of_variances += variance;
375 moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
379 alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
384 struct tab_table *tab = tab_create(1, 1);
386 tab_dim (tab, tab_natural_dimensions, NULL, NULL);
387 tab_flags (tab, SOMF_NO_TITLE );
389 tab_text_format (tab, 0, 0, 0, "Scale: %s", ds_cstr (&rel->scale_name));
395 case_processing_summary (n_valid, n_missing, dataset_dict (ds));
399 static void reliability_statistics_model_alpha (struct tab_table *tbl,
400 const struct reliability *rel);
402 static void reliability_statistics_model_split (struct tab_table *tbl,
403 const struct reliability *rel);
405 struct reliability_output_table
411 void (*populate) (struct tab_table *, const struct reliability *);
414 static struct reliability_output_table rol[2] =
416 { 2, 2, 1, 1, reliability_statistics_model_alpha},
417 { 4, 9, 3, 0, reliability_statistics_model_split}
421 reliability_statistics (const struct reliability *rel)
423 int n_cols = rol[rel->model].n_cols;
424 int n_rows = rol[rel->model].n_rows;
425 int heading_columns = rol[rel->model].heading_cols;
426 int heading_rows = rol[rel->model].heading_rows;
428 struct tab_table *tbl = tab_create (n_cols, n_rows);
429 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
431 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
433 tab_title (tbl, _("Reliability Statistics"));
435 /* Vertical lines for the data only */
440 n_cols - 1, n_rows - 1);
442 /* Box around table */
447 n_cols - 1, n_rows - 1);
450 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
452 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
454 if ( rel->model == MODEL_ALPHA )
455 reliability_statistics_model_alpha (tbl, rel);
456 else if (rel->model == MODEL_SPLIT )
457 reliability_statistics_model_split (tbl, rel);
463 reliability_summary_total (const struct reliability *rel)
466 const int n_cols = 5;
467 const int heading_columns = 1;
468 const int heading_rows = 1;
469 const int n_rows = rel->sc[0].n_items + heading_rows ;
471 struct tab_table *tbl = tab_create (n_cols, n_rows);
472 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
474 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
476 tab_title (tbl, _("Item-Total Statistics"));
478 /* Vertical lines for the data only */
483 n_cols - 1, n_rows - 1);
485 /* Box around table */
490 n_cols - 1, n_rows - 1);
493 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
495 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
497 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
498 _("Scale Mean if Item Deleted"));
500 tab_text (tbl, 2, 0, TAB_CENTER | TAT_TITLE,
501 _("Scale Variance if Item Deleted"));
503 tab_text (tbl, 3, 0, TAB_CENTER | TAT_TITLE,
504 _("Corrected Item-Total Correlation"));
506 tab_text (tbl, 4, 0, TAB_CENTER | TAT_TITLE,
507 _("Cronbach's Alpha if Item Deleted"));
510 for (i = 0 ; i < rel->sc[0].n_items; ++i)
512 double cov, item_to_total_r;
513 double mean, weight, var;
515 const struct cronbach *s = &rel->sc[rel->total_start + i];
516 tab_text (tbl, 0, heading_rows + i, TAB_LEFT| TAT_TITLE,
517 var_to_string (rel->sc[0].items[i]));
519 moments1_calculate (s->total, &weight, &mean, &var, 0, 0);
521 tab_double (tbl, 1, heading_rows + i, TAB_RIGHT,
524 tab_double (tbl, 2, heading_rows + i, TAB_RIGHT,
525 s->variance_of_sums, NULL);
527 tab_double (tbl, 4, heading_rows + i, TAB_RIGHT,
531 moments1_calculate (rel->sc[0].m[i], &weight, &mean, &var, 0,0);
532 cov = rel->sc[0].variance_of_sums + var - s->variance_of_sums;
535 item_to_total_r = (cov - var) / (sqrt(var) * sqrt (s->variance_of_sums));
538 tab_double (tbl, 3, heading_rows + i, TAB_RIGHT,
539 item_to_total_r, NULL);
548 reliability_statistics_model_alpha (struct tab_table *tbl,
549 const struct reliability *rel)
551 const struct variable *wv = dict_get_weight (rel->dict);
552 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
554 const struct cronbach *s = &rel->sc[0];
556 tab_text (tbl, 0, 0, TAB_CENTER | TAT_TITLE,
557 _("Cronbach's Alpha"));
559 tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
562 tab_double (tbl, 0, 1, TAB_RIGHT, s->alpha, NULL);
564 tab_double (tbl, 1, 1, TAB_RIGHT, s->n_items, wfmt);
569 reliability_statistics_model_split (struct tab_table *tbl,
570 const struct reliability *rel)
572 const struct variable *wv = dict_get_weight (rel->dict);
573 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
575 tab_text (tbl, 0, 0, TAB_LEFT,
576 _("Cronbach's Alpha"));
578 tab_text (tbl, 1, 0, TAB_LEFT,
581 tab_text (tbl, 2, 0, TAB_LEFT,
584 tab_text (tbl, 2, 1, TAB_LEFT,
589 tab_text (tbl, 1, 2, TAB_LEFT,
592 tab_text (tbl, 2, 2, TAB_LEFT,
595 tab_text (tbl, 2, 3, TAB_LEFT,
600 tab_text (tbl, 1, 4, TAB_LEFT,
601 _("Total N of Items"));
603 tab_text (tbl, 0, 5, TAB_LEFT,
604 _("Correlation Between Forms"));
607 tab_text (tbl, 0, 6, TAB_LEFT,
608 _("Spearman-Brown Coefficient"));
610 tab_text (tbl, 1, 6, TAB_LEFT,
613 tab_text (tbl, 1, 7, TAB_LEFT,
614 _("Unequal Length"));
617 tab_text (tbl, 0, 8, TAB_LEFT,
618 _("Guttman Split-Half Coefficient"));
622 tab_double (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, NULL);
623 tab_double (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, NULL);
625 tab_double (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, wfmt);
626 tab_double (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, wfmt);
628 tab_double (tbl, 3, 4, TAB_RIGHT,
629 rel->sc[1].n_items + rel->sc[2].n_items, wfmt);
632 /* R is the correlation between the two parts */
633 double r = rel->sc[0].variance_of_sums -
634 rel->sc[1].variance_of_sums -
635 rel->sc[2].variance_of_sums ;
637 /* Guttman Split Half Coefficient */
638 double g = 2 * r / rel->sc[0].variance_of_sums;
640 /* Unequal Length Spearman Brown Coefficient, and
641 intermediate value used in the computation thereof */
644 r /= sqrt (rel->sc[1].variance_of_sums);
645 r /= sqrt (rel->sc[2].variance_of_sums);
648 tab_double (tbl, 3, 5, TAB_RIGHT, r, NULL);
650 /* Equal length Spearman-Brown Coefficient */
651 tab_double (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), NULL);
653 tab_double (tbl, 3, 8, TAB_RIGHT, g, NULL);
655 tmp = (1.0 - r*r) * rel->sc[1].n_items * rel->sc[2].n_items /
656 pow2 (rel->sc[0].n_items);
658 uly = sqrt( pow4 (r) + 4 * pow2 (r) * tmp);
662 tab_double (tbl, 3, 7, TAB_RIGHT, uly, NULL);
669 case_processing_summary (casenumber n_valid, casenumber n_missing,
670 const struct dictionary *dict)
672 const struct variable *wv = dict_get_weight (dict);
673 const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
678 int heading_columns = 2;
679 int heading_rows = 1;
680 struct tab_table *tbl;
681 tbl = tab_create (n_cols, n_rows);
682 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
684 tab_dim (tbl, tab_natural_dimensions, NULL, NULL);
686 tab_title (tbl, _("Case Processing Summary"));
688 /* Vertical lines for the data only */
693 n_cols - 1, n_rows - 1);
695 /* Box around table */
700 n_cols - 1, n_rows - 1);
703 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
705 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
708 tab_text (tbl, 0, heading_rows, TAB_LEFT | TAT_TITLE,
711 tab_text (tbl, 1, heading_rows, TAB_LEFT | TAT_TITLE,
714 tab_text (tbl, 1, heading_rows + 1, TAB_LEFT | TAT_TITLE,
717 tab_text (tbl, 1, heading_rows + 2, TAB_LEFT | TAT_TITLE,
720 tab_text (tbl, heading_columns, 0, TAB_CENTER | TAT_TITLE,
723 tab_text (tbl, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE, _("%"));
725 total = n_missing + n_valid;
727 tab_double (tbl, 2, heading_rows, TAB_RIGHT,
731 tab_double (tbl, 2, heading_rows + 1, TAB_RIGHT,
735 tab_double (tbl, 2, heading_rows + 2, TAB_RIGHT,
739 tab_double (tbl, 3, heading_rows, TAB_RIGHT,
740 100 * n_valid / (double) total, NULL);
743 tab_double (tbl, 3, heading_rows + 1, TAB_RIGHT,
744 100 * n_missing / (double) total, NULL);
747 tab_double (tbl, 3, heading_rows + 2, TAB_RIGHT,
748 100 * total / (double) total, NULL);
755 rel_custom_model (struct lexer *lexer, struct dataset *ds UNUSED,
756 struct cmd_reliability *cmd UNUSED, void *aux)
758 struct reliability *rel = aux;
760 if (lex_match_id (lexer, "ALPHA"))
762 rel->model = MODEL_ALPHA;
764 else if (lex_match_id (lexer, "SPLIT"))
766 rel->model = MODEL_SPLIT;
767 rel->split_point = -1;
768 if ( lex_match (lexer, '('))
770 lex_force_num (lexer);
771 rel->split_point = lex_number (lexer);
773 lex_force_match (lexer, ')');
785 rel_custom_scale (struct lexer *lexer, struct dataset *ds UNUSED,
786 struct cmd_reliability *p, void *aux)
788 struct const_var_set *vs;
789 struct reliability *rel = aux;
790 struct cronbach *scale;
793 rel->sc = xzalloc (sizeof (struct cronbach) * rel->n_sc);
796 if ( ! lex_force_match (lexer, '(')) return 0;
798 if ( ! lex_force_string (lexer) ) return 0;
800 ds_init_string (&rel->scale_name, lex_tokstr (lexer));
804 if ( ! lex_force_match (lexer, ')')) return 0;
806 lex_match (lexer, '=');
808 vs = const_var_set_create_from_array (p->v_variables, p->n_variables);
810 if (!parse_const_var_set_vars (lexer, vs, &scale->items, &scale->n_items, 0))
812 const_var_set_destroy (vs);
816 const_var_set_destroy (vs);