Implemented the reliability command.

author John Darrington <john@marilyn.intra>

Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)

committer John Darrington <john@marilyn.intra>

Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)
author John Darrington <john@marilyn.intra>
Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)
committer John Darrington <john@marilyn.intra>
Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 7b1d8c5f341549d5b8c22c8a3c58c9b9b956da13..a67402c0e13bd178dc720078f5cf819ee829e160 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -14,6 +14,7 @@ far.
  * ONEWAY::                      One way analysis of variance.
  * RANK::                        Compute rank scores.
  * REGRESSION::                  Linear regression.
+* RELIABILITY::                 Reliability analysis.
  @end menu
  
  @node DESCRIPTIVES
@@ -837,3 +838,50 @@ user-missing are to be excluded from the rank scores. A setting of
  INCLUDE means they are to be included.  The default is EXCLUDE.
  
  @include regression.texi
+
+
+@node RELIABILITY
+@section RELIABILITY
+
+@vindex RELIABILITY
+@display
+RELIABILITY
+        /VARIABLES=var_list
+        /SCALE (@var{name}) = @{var_list, ALL@}
+        /MODEL=@{ALPHA, SPLIT[(N)]@}
+        /SUMMARY=@{TOTAL,ALL@}
+        /MISSING=@{EXCLUDE,INCLUDE@}
+@end display
+
+@cindex Cronbach's Alpha
+The @cmd{RELIABILTY} command performs reliablity analysis on the data.
+
+The VARIABLES subcommand is required. It determines the set of variables 
+upon which analysis is to be performed.
+
+The SCALE subcommand determines which variables reliability is to be 
+calculated for.  If it is omitted, then analysis for all variables named
+in the VARIABLES subcommand will be used.
+Optionally, the @var{name} parameter may be specified to set a string name 
+for the scale.
+
+The MODEL subcommand determines the type of analysis. If ALPHA is specified, 
+then Cronbach's Alpha is calculated for the scale.  If the model is SPLIT, 
+then the variables  are divided into 2 subsets.  An optional parameter 
+@var{N} may be given, to specify how many variables to be in the first subset.
+If @var{N} is omitted, then it defaults to one half of the variables in the 
+scale, or one half minus one if there are an odd number of variables.
+The default model is ALPHA.
+
+By default, any cases with user missing, or system missing values for 
+any variables given 
+in the VARIABLES subcommand will be omitted from analysis.
+The MISSING subcommand determines whether user missing values are to 
+be included or excluded in the analysis.
+
+The SUMMARY subcommand determines the type of summary analysis to be performed.
+Currently there is only one type: SUMMARY=TOTAL, which displays per-item
+analysis tested against the totals.
+
+
+
diff --git a/src/language/command.def b/src/language/command.def

index 77eb3a3a7a7ea2e5e666220d1cefac0d56354bb7..c2cc7f624f6593e46fd19854fd253dfcf119828d 100644 (file)
--- a/src/language/command.def
+++ b/src/language/command.def
@@ -113,6 +113,7 @@ DEF_CMD (S_DATA, 0, "ONEWAY", cmd_oneway)
  DEF_CMD (S_DATA, 0, "PEARSON CORRELATIONS", cmd_correlations)
  DEF_CMD (S_DATA, 0, "RANK", cmd_rank)
  DEF_CMD (S_DATA, 0, "REGRESSION", cmd_regression)
+DEF_CMD (S_DATA, 0, "RELIABILITY", cmd_reliability)
  DEF_CMD (S_DATA, 0, "RENAME VARIABLES", cmd_rename_variables)
  DEF_CMD (S_DATA, 0, "SAMPLE", cmd_sample)
  DEF_CMD (S_DATA, 0, "SAVE", cmd_save)
@@ -231,7 +232,6 @@ UNIMPL_CMD ("RATIO STATISTICS", "Descriptives of ratios")
  UNIMPL_CMD ("READ MODEL", "Read new model")
  UNIMPL_CMD ("RECORD TYPE", "Defines a type of record within FILE TYPE")
  UNIMPL_CMD ("REFORMAT", "Read obsolete files")
-UNIMPL_CMD ("RELIABILITY", "Reliability estimates")
  UNIMPL_CMD ("REPEATING DATA", "Specify multiple cases per input record")
  UNIMPL_CMD ("REPORT", "Pretty print working file")
  UNIMPL_CMD ("RESTORE", "Restore settings")
diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk

index d60cb0dbdb6a15a59fcd31312a147f6b4da15d9e..9981ed67c98442bd1fd8884850891080f7a62bf6 100644 (file)
--- a/src/language/stats/automake.mk
+++ b/src/language/stats/automake.mk
@@ -13,6 +13,7 @@ src_language_stats_built_sources = \
         src/language/stats/oneway.c \
         src/language/stats/rank.c \
         src/language/stats/regression.c \
+       src/language/stats/reliability.c \
         src/language/stats/t-test.c
  
  language_stats_sources = \
diff --git a/src/language/stats/reliability.q b/src/language/stats/reliability.q

new file mode 100644 (file)

index 0000000..edacdf1
--- /dev/null
+++ b/src/language/stats/reliability.q
@@ -0,0 +1,809 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "xalloc.h"
+#include "xmalloca.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+#include <data/variable.h>
+#include <data/dictionary.h>
+#include <data/procedure.h>
+#include <data/casereader.h>
+#include <data/casegrouper.h>
+#include <math/moments.h>
+#include <data/case.h>
+
+#include <language/command.h>
+
+#include <output/manager.h>
+#include <output/table.h>
+
+/* (headers) */
+
+/* (specification)
+   reliability (rel_):
+     *^variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
+     scale=custom;
+     missing=miss:!exclude/include;
+     model=custom;
+     method=covariance;
+     +summary[sum_]=total.
+*/
+/* (declarations) */
+/* (functions) */
+
+
+static int rel_custom_scale (struct lexer *lexer, struct dataset *ds,
+                     struct cmd_reliability *p, void *aux);
+
+static int rel_custom_model (struct lexer *, struct dataset *,
+                            struct cmd_reliability *, void *);
+
+int cmd_reliability (struct lexer *lexer, struct dataset *ds);
+
+struct cronbach
+{
+  const struct variable **items;
+  size_t n_items;
+  double alpha;
+  double sum_of_variances;
+  double variance_of_sums;
+  int totals_idx;          /* Casereader index into the totals */
+
+  struct moments1 **m ;    /* Moments of the items */
+  struct moments1 *total ; /* Moments of the totals */
+};
+
+#if 0
+static void
+dump_cronbach (const struct cronbach *s)
+{
+  int i;
+  printf ("N items %d\n", s->n_items);
+  for (i = 0 ; i < s->n_items; ++i)
+    {
+      printf ("%s\n", var_get_name (s->items[i]));
+    }
+
+  printf ("Totals idx %d\n", s->totals_idx);
+
+  printf ("scale variance %g\n", s->variance_of_sums);
+  printf ("alpha %g\n", s->alpha);
+  putchar ('\n');
+}
+#endif
+
+enum model
+  {
+    MODEL_ALPHA,
+    MODEL_SPLIT
+  };
+
+
+struct reliability
+{
+  const struct variable **variables;
+  int n_variables;
+  enum mv_class exclude;
+
+  struct cronbach *sc;
+  int n_sc;
+
+  int total_start;
+
+  struct string scale_name;
+
+  enum model model;
+  int split_point;
+};
+
+
+static double
+alpha (int k, double sum_of_variances, double variance_of_sums)
+{
+  return k / ( k - 1.0) * ( 1 - sum_of_variances / variance_of_sums);
+}
+
+static void reliability_summary_total (const struct reliability *rel);
+
+static void reliability_statistics (const struct reliability *rel);
+
+
+
+static void
+run_reliability (struct casereader *group, struct dataset *ds,
+                struct reliability *rel);
+
+
+int
+cmd_reliability (struct lexer *lexer, struct dataset *ds)
+{
+  int i;
+  bool ok = false;
+  struct casegrouper *grouper;
+  struct casereader *group;
+  struct cmd_reliability cmd;
+
+  struct reliability rel = {
+    NULL, 0, MV_ANY, NULL, 0, -1,
+    DS_EMPTY_INITIALIZER,
+    MODEL_ALPHA, 0};
+
+  cmd.v_variables = NULL;
+
+  if ( ! parse_reliability (lexer, ds, &cmd, &rel) )
+    {
+      goto done;
+    }
+
+  rel.variables = cmd.v_variables;
+  rel.n_variables = cmd.n_variables;
+  rel.exclude = MV_ANY;
+
+
+  if (NULL == rel.sc)
+    {
+      struct cronbach *c;
+      /* Create a default Scale */
+
+      rel.n_sc = 1;
+      rel.sc = xzalloc (sizeof (struct cronbach) * rel.n_sc);
+
+      ds_init_cstr (&rel.scale_name, "ANY");
+
+      c = &rel.sc[0];
+      c->n_items = cmd.n_variables;
+      c->items = xzalloc (sizeof (struct variable*) * c->n_items);
+
+      for (i = 0 ; i < c->n_items ; ++i)
+       c->items[i] = cmd.v_variables[i];
+    }
+
+  if ( cmd.miss == REL_INCLUDE)
+    rel.exclude = MV_SYSTEM;
+
+  if ( rel.model == MODEL_SPLIT)
+    {
+      int i;
+      const struct cronbach *s;
+
+      rel.n_sc += 2 ;
+      rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc);
+
+      s = &rel.sc[0];
+
+      rel.sc[1].n_items =
+       (rel.split_point == -1) ? s->n_items / 2 : rel.split_point;
+
+      rel.sc[2].n_items = s->n_items - rel.sc[1].n_items;
+      rel.sc[1].items = xzalloc (sizeof (struct variable *)
+                                * rel.sc[1].n_items);
+
+      rel.sc[2].items = xzalloc (sizeof (struct variable *) *
+                                rel.sc[2].n_items);
+
+      for  (i = 0; i < rel.sc[1].n_items ; ++i)
+       rel.sc[1].items[i] = s->items[i];
+
+      while (i < s->n_items)
+       {
+         rel.sc[2].items[i - rel.sc[1].n_items] = s->items[i];
+         i++;
+       }
+    }
+
+  if (cmd.a_summary[REL_SUM_TOTAL])
+    {
+      int i;
+      const int base_sc = rel.n_sc;
+
+      rel.total_start = base_sc;
+
+      rel.n_sc +=  rel.sc[0].n_items ;
+      rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc);
+
+      for (i = 0 ; i < rel.sc[0].n_items; ++i )
+       {
+         int v_src;
+         int v_dest = 0;
+         struct cronbach *s = &rel.sc[i + base_sc];
+
+         s->n_items = rel.sc[0].n_items - 1;
+         s->items = xzalloc (sizeof (struct variable *) * s->n_items);
+         for (v_src = 0 ; v_src < rel.sc[0].n_items ; ++v_src)
+           {
+             if ( v_src != i)
+               s->items[v_dest++] = rel.sc[0].items[v_src];
+           }
+       }
+    }
+
+  /* Data pass. */
+  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
+  while (casegrouper_get_next_group (grouper, &group))
+    {
+      run_reliability (group, ds, &rel);
+
+      reliability_statistics (&rel);
+
+      if (cmd.a_summary[REL_SUM_TOTAL])
+       reliability_summary_total (&rel);
+    }
+  ok = casegrouper_destroy (grouper);
+  ok = proc_commit (ds) && ok;
+
+  free_reliability (&cmd);
+
+ done:
+
+  /* Free all the stuff */
+  for (i = 0 ; i < rel.n_sc; ++i)
+    {
+      int x;
+      struct cronbach *c = &rel.sc[i];
+      free (c->items);
+
+      moments1_destroy (c->total);
+      for (x = 0 ; x < c->n_items; ++x)
+       moments1_destroy (c->m[x]);
+      free (c->m);
+    }
+
+  ds_destroy (&rel.scale_name);
+  free (rel.sc);
+
+  if (ok)
+    return CMD_SUCCESS;
+
+  return CMD_FAILURE;
+}
+
+/* Return the sum of all the item variables in S */
+static  double
+append_sum (const struct ccase *c, casenumber n UNUSED, void *aux)
+{
+  double sum = 0;
+  const struct cronbach *s = aux;
+
+  int v;
+  for (v = 0 ; v < s->n_items; ++v)
+    {
+      sum += case_data (c, s->items[v])->f;
+    }
+
+  return sum;
+};
+
+
+static void case_processing_summary (casenumber n_valid, casenumber n_missing);
+
+static void
+run_reliability (struct casereader *input, struct dataset *ds UNUSED,
+                struct reliability *rel)
+{
+  int i;
+  int si;
+  struct ccase c;
+  casenumber n_missing ;
+  casenumber n_valid = 0;
+
+
+  for (si = 0 ; si < rel->n_sc; ++si)
+    {
+      struct cronbach *s = &rel->sc[si];
+
+      s->m = xzalloc (sizeof (s->m) * s->n_items);
+      s->total = moments1_create (MOMENT_VARIANCE);
+
+      for (i = 0 ; i < s->n_items ; ++i )
+       s->m[i] = moments1_create (MOMENT_VARIANCE);
+    }
+
+  input = casereader_create_filter_missing (input,
+                                           rel->variables,
+                                           rel->n_variables,
+                                           rel->exclude,
+                                           &n_missing,
+                                           NULL);
+
+  for (si = 0 ; si < rel->n_sc; ++si)
+    {
+      struct cronbach *s = &rel->sc[si];
+
+
+      s->totals_idx = casereader_get_value_cnt (input);
+      input =
+       casereader_create_append_numeric (input, append_sum,
+                                         s, NULL);
+    }
+
+  for (; casereader_read (input, &c); case_destroy (&c))
+    {
+      double weight = 1.0;
+      n_valid ++;
+
+      for (si = 0; si < rel->n_sc; ++si)
+       {
+         struct cronbach *s = &rel->sc[si];
+
+         for (i = 0 ; i < s->n_items ; ++i )
+           moments1_add (s->m[i], case_data (&c, s->items[i])->f, weight);
+
+         moments1_add (s->total, case_data_idx (&c, s->totals_idx)->f, weight);
+       }
+    }
+  casereader_destroy (input);
+
+  for (si = 0; si < rel->n_sc; ++si)
+    {
+      struct cronbach *s = &rel->sc[si];
+
+      s->sum_of_variances = 0;
+      for (i = 0 ; i < s->n_items ; ++i )
+       {
+         double weight, mean, variance;
+         moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL);
+
+         s->sum_of_variances += variance;
+       }
+
+      moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums,
+                         NULL, NULL);
+
+      s->alpha =
+       alpha (s->n_items, s->sum_of_variances, s->variance_of_sums);
+    }
+
+
+  {
+    struct tab_table *tab = tab_create(1, 1, 0);
+
+    tab_dim (tab, tab_natural_dimensions);
+    tab_flags (tab, SOMF_NO_TITLE );
+
+    tab_text(tab, 0, 0, TAT_PRINTF, "Scale: %s", ds_cstr (&rel->scale_name));
+
+    tab_submit(tab);
+  }
+
+
+  case_processing_summary (n_valid, n_missing);
+}
+
+
+static void reliability_statistics_model_alpha (struct tab_table *tbl,
+                                               const struct reliability *rel);
+
+static void reliability_statistics_model_split (struct tab_table *tbl,
+                                               const struct reliability *rel);
+
+struct reliability_output_table
+{
+  int n_cols;
+  int n_rows;
+  int heading_cols;
+  int heading_rows;
+  void (*populate)(struct tab_table *, const struct reliability *);
+};
+
+static struct reliability_output_table rol[2] =
+  {
+    { 2, 2, 1, 1, reliability_statistics_model_alpha},
+    { 4, 9, 3, 0, reliability_statistics_model_split}
+  };
+
+static void
+reliability_statistics (const struct reliability *rel)
+{
+  int n_cols = rol[rel->model].n_cols;
+  int n_rows = rol[rel->model].n_rows;
+  int heading_columns = rol[rel->model].heading_cols;
+  int heading_rows = rol[rel->model].heading_rows;
+
+  struct tab_table *tbl = tab_create (n_cols, n_rows, 0);
+  tab_headers (tbl, heading_columns, 0, heading_rows, 0);
+
+  tab_dim (tbl, tab_natural_dimensions);
+
+  tab_title (tbl, _("Reliability Statistics"));
+
+  /* Vertical lines for the data only */
+  tab_box (tbl,
+          -1, -1,
+          -1, TAL_1,
+          heading_columns, 0,
+          n_cols - 1, n_rows - 1);
+
+  /* Box around table */
+  tab_box (tbl,
+          TAL_2, TAL_2,
+          -1, -1,
+          0, 0,
+          n_cols - 1, n_rows - 1);
+
+
+  tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
+
+  tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
+
+  if ( rel->model == MODEL_ALPHA )
+    reliability_statistics_model_alpha (tbl, rel);
+  else if (rel->model == MODEL_SPLIT )
+    reliability_statistics_model_split (tbl, rel);
+
+  tab_submit (tbl);
+}
+
+static void
+reliability_summary_total (const struct reliability *rel)
+{
+  int i;
+  const int n_cols = 5;
+  const int heading_columns = 1;
+  const int heading_rows = 1;
+  const int n_rows = rel->sc[0].n_items + heading_rows ;
+
+  struct tab_table *tbl = tab_create (n_cols, n_rows, 0);
+  tab_headers (tbl, heading_columns, 0, heading_rows, 0);
+
+  tab_dim (tbl, tab_natural_dimensions);
+
+  tab_title (tbl, _("Item-Total Statistics"));
+
+  /* Vertical lines for the data only */
+  tab_box (tbl,
+          -1, -1,
+          -1, TAL_1,
+          heading_columns, 0,
+          n_cols - 1, n_rows - 1);
+
+  /* Box around table */
+  tab_box (tbl,
+          TAL_2, TAL_2,
+          -1, -1,
+          0, 0,
+          n_cols - 1, n_rows - 1);
+
+
+  tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
+
+  tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
+
+  tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
+           _("Scale Mean if Item Deleted"));
+
+  tab_text (tbl, 2, 0, TAB_CENTER | TAT_TITLE,
+           _("Scale Variance if Item Deleted"));
+
+  tab_text (tbl, 3, 0, TAB_CENTER | TAT_TITLE,
+           _("Corrected Item-Total Correlation"));
+
+  tab_text (tbl, 4, 0, TAB_CENTER | TAT_TITLE,
+           _("Cronbach's Alpha if Item Deleted"));
+
+
+  for (i = 0 ; i < rel->sc[0].n_items; ++i)
+    {
+      double cov, item_to_total_r;
+      double mean, weight, var;
+
+      const struct cronbach *s = &rel->sc[rel->total_start + i];
+      tab_text (tbl, 0, heading_rows + i, TAB_LEFT| TAT_TITLE,
+               var_to_string (rel->sc[0].items[i]));
+
+      moments1_calculate (s->total, &weight, &mean, &var, 0, 0);
+
+      tab_float (tbl, 1, heading_rows + i, TAB_RIGHT,
+                mean, 8, 3);
+
+      tab_float (tbl, 2, heading_rows + i, TAB_RIGHT,
+                s->variance_of_sums, 8, 3);
+
+      tab_float (tbl, 4, heading_rows + i, TAB_RIGHT,
+                s->alpha, 8, 3);
+
+
+      moments1_calculate (rel->sc[0].m[i], &weight, &mean, &var, 0,0);
+      cov = rel->sc[0].variance_of_sums + var - s->variance_of_sums;
+      cov /= 2.0;
+
+      item_to_total_r = (cov - var) / (sqrt(var) * sqrt (s->variance_of_sums));
+
+
+      tab_float (tbl, 3, heading_rows + i, TAB_RIGHT,
+                item_to_total_r, 8, 3);
+    }
+
+
+  tab_submit (tbl);
+}
+
+
+static void
+reliability_statistics_model_alpha (struct tab_table *tbl,
+                                   const struct reliability *rel)
+{
+  const struct cronbach *s = &rel->sc[0];
+
+  tab_text (tbl, 0, 0, TAB_CENTER | TAT_TITLE,
+               _("Cronbach's Alpha"));
+
+  tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE,
+               _("N of items"));
+
+  tab_float (tbl, 0, 1, TAB_RIGHT, s->alpha, 8, 3);
+
+  tab_float (tbl, 1, 1, TAB_RIGHT, s->n_items, 8, 0);
+}
+
+
+static void
+reliability_statistics_model_split (struct tab_table *tbl,
+                                   const struct reliability *rel)
+{
+  tab_text (tbl, 0, 0, TAB_LEFT,
+           _("Cronbach's Alpha"));
+
+  tab_text (tbl, 1, 0, TAB_LEFT,
+           _("Part 1"));
+
+  tab_text (tbl, 2, 0, TAB_LEFT,
+           _("Value"));
+
+  tab_text (tbl, 2, 1, TAB_LEFT,
+           _("N of Items"));
+
+
+
+  tab_text (tbl, 1, 2, TAB_LEFT,
+           _("Part 2"));
+
+  tab_text (tbl, 2, 2, TAB_LEFT,
+           _("Value"));
+
+  tab_text (tbl, 2, 3, TAB_LEFT,
+           _("N of Items"));
+
+
+
+  tab_text (tbl, 1, 4, TAB_LEFT,
+           _("Total N of Items"));
+
+  tab_text (tbl, 0, 5, TAB_LEFT,
+           _("Correlation Between Forms"));
+
+
+  tab_text (tbl, 0, 6, TAB_LEFT,
+           _("Spearman-Brown Coefficient"));
+
+  tab_text (tbl, 1, 6, TAB_LEFT,
+           _("Equal Length"));
+
+  tab_text (tbl, 1, 7, TAB_LEFT,
+           _("Unequal Length"));
+
+
+  tab_text (tbl, 0, 8, TAB_LEFT,
+           _("Guttman Split-Half Coefficient"));
+
+
+
+  tab_float (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, 8, 3);
+  tab_float (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, 8, 3);
+
+  tab_float (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, 8, 0);
+  tab_float (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, 8, 0);
+
+  tab_float (tbl, 3, 4, TAB_RIGHT,
+            rel->sc[1].n_items + rel->sc[2].n_items, 8, 0);
+
+  {
+    /* R is the correlation between the two parts */
+    double r = rel->sc[0].variance_of_sums -
+      rel->sc[1].variance_of_sums -
+      rel->sc[2].variance_of_sums ;
+
+    /* Guttman Split Half Coefficient */
+    double g = 2 * r / rel->sc[0].variance_of_sums;
+
+    /* Unequal Length Spearman Brown Coefficient, and
+     intermediate value used in the computation thereof */
+    double uly, tmp;
+
+    r /= sqrt (rel->sc[1].variance_of_sums);
+    r /= sqrt (rel->sc[2].variance_of_sums);
+    r /= 2.0;
+
+    tab_float (tbl, 3, 5, TAB_RIGHT, r, 8, 3);
+
+    /* Equal length Spearman-Brown Coefficient */
+    tab_float (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), 8, 3);
+
+    tab_float (tbl, 3, 8, TAB_RIGHT, g, 8, 3);
+
+    tmp = (1.0 - r*r) * rel->sc[1].n_items * rel->sc[2].n_items /
+      SQR (rel->sc[0].n_items);
+
+    uly = sqrt( SQR (SQR (r)) + 4 * SQR (r) * tmp);
+    uly -= SQR (r);
+    uly /= 2 * tmp;
+
+    tab_float (tbl, 3, 7, TAB_RIGHT, uly, 8, 3);
+
+  }
+}
+
+
+
+static void
+case_processing_summary (casenumber n_valid, casenumber n_missing)
+{
+  casenumber total;
+  int n_cols = 4;
+  int n_rows = 4;
+  int heading_columns = 2;
+  int heading_rows = 1;
+  struct tab_table *tbl;
+  tbl = tab_create (n_cols, n_rows, 0);
+  tab_headers (tbl, heading_columns, 0, heading_rows, 0);
+
+  tab_dim (tbl, tab_natural_dimensions);
+
+  tab_title (tbl, _("Case Processing Summary"));
+
+  /* Vertical lines for the data only */
+  tab_box (tbl,
+          -1, -1,
+          -1, TAL_1,
+          heading_columns, 0,
+          n_cols - 1, n_rows - 1);
+
+  /* Box around table */
+  tab_box (tbl,
+          TAL_2, TAL_2,
+          -1, -1,
+          0, 0,
+          n_cols - 1, n_rows - 1);
+
+
+  tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows);
+
+  tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
+
+
+  tab_text (tbl, 0, heading_rows, TAB_LEFT | TAT_TITLE,
+               _("Cases"));
+
+  tab_text (tbl, 1, heading_rows, TAB_LEFT | TAT_TITLE,
+               _("Valid"));
+
+  tab_text (tbl, 1, heading_rows + 1, TAB_LEFT | TAT_TITLE,
+               _("Excluded"));
+
+  tab_text (tbl, 1, heading_rows + 2, TAB_LEFT | TAT_TITLE,
+               _("Total"));
+
+  tab_text (tbl, heading_columns, 0, TAB_CENTER | TAT_TITLE,
+               _("N"));
+
+  tab_text (tbl, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF,
+               _("%%"));
+
+  total = n_missing + n_valid;
+
+  tab_float (tbl, 2, heading_rows, TAB_RIGHT,
+            n_valid, 8, 0);
+
+
+  tab_float (tbl, 2, heading_rows + 1, TAB_RIGHT,
+            n_missing, 8, 0);
+
+
+  tab_float (tbl, 2, heading_rows + 2, TAB_RIGHT,
+            total, 8, 0);
+
+
+  tab_float (tbl, 3, heading_rows, TAB_RIGHT,
+            100 * n_valid / (double) total, 8, 1);
+
+
+  tab_float (tbl, 3, heading_rows + 1, TAB_RIGHT,
+            100 * n_missing / (double) total, 8, 1);
+
+
+  tab_float (tbl, 3, heading_rows + 2, TAB_RIGHT,
+            100 * total / (double) total, 8, 1);
+
+
+  tab_submit (tbl);
+}
+
+static int
+rel_custom_model (struct lexer *lexer, struct dataset *ds UNUSED,
+                 struct cmd_reliability *cmd UNUSED, void *aux)
+{
+  struct reliability *rel = aux;
+
+  if (lex_match_id (lexer, "ALPHA"))
+    {
+      rel->model = MODEL_ALPHA;
+    }
+  else if (lex_match_id (lexer, "SPLIT"))
+    {
+      rel->model = MODEL_SPLIT;
+      rel->split_point = -1;
+      if ( lex_match (lexer, '('))
+       {
+         lex_force_num (lexer);
+         rel->split_point = lex_number (lexer);
+         lex_get (lexer);
+         lex_force_match (lexer, ')');
+       }
+    }
+  else
+    return 0;
+
+  return 1;
+}
+
+
+
+static int
+rel_custom_scale (struct lexer *lexer, struct dataset *ds UNUSED,
+                 struct cmd_reliability *p, void *aux)
+{
+  struct const_var_set *vs;
+  struct reliability *rel = aux;
+  struct cronbach *scale;
+
+  rel->n_sc = 1;
+  rel->sc = xzalloc (sizeof (struct cronbach) * rel->n_sc);
+  scale = &rel->sc[0];
+
+  if ( ! lex_force_match (lexer, '(')) return 0;
+
+  if ( ! lex_force_string (lexer) ) return 0;
+
+  ds_init_string (&rel->scale_name, lex_tokstr (lexer));
+
+  lex_get (lexer);
+
+  if ( ! lex_force_match (lexer, ')')) return 0;
+
+  lex_match (lexer, '=');
+
+  vs = const_var_set_create_from_array (p->v_variables, p->n_variables);
+
+  if (!parse_const_var_set_vars (lexer, vs, &scale->items, &scale->n_items, 0))
+    {
+      const_var_set_destroy (vs);
+      return 2;
+    }
+
+  const_var_set_destroy (vs);
+  return 1;
+}
+
+/*
+   Local Variables:
+   mode: c
+   End:
+*/
diff --git a/src/libpspp/misc.h b/src/libpspp/misc.h

index 3b02515709ca9969c8d67eab5c9c2210e4822d1e..f3afb1fb91a88b8d91fd7835a1ab754d95fa06c4 100644 (file)
--- a/src/libpspp/misc.h
+++ b/src/libpspp/misc.h
@@ -35,6 +35,8 @@
  /* Rounds X down to the previous multiple of Y. */
  #define ROUND_DOWN(X, Y) ((X) / (Y) * (Y))
  
+#define SQR(X) ((X) * (X))
+
  int intlog10 (unsigned);
  
  /* Returns the square of X. */
diff --git a/tests/automake.mk b/tests/automake.mk

index 909dd7d406adecc2967e7df9a33772b2cdfa91b8..505c4335980bc3fec4c4ee55701e41f3bf3ff795 100644 (file)
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -50,6 +50,7 @@ dist_TESTS = \
         tests/command/rename.sh \
         tests/command/regression.sh \
         tests/command/regression-qr.sh \
+       tests/command/reliability.sh \
         tests/command/sample.sh \
         tests/command/sort.sh \
         tests/command/sysfiles.sh \
diff --git a/tests/command/reliability.sh b/tests/command/reliability.sh

new file mode 100755 (executable)

index 0000000..0bfa733
--- /dev/null
+++ b/tests/command/reliability.sh
@@ -0,0 +1,345 @@
+#!/bin/sh
+
+# This program tests the reliability command.
+
+TEMPDIR=/tmp/pspp-tst-$$
+TESTFILE=$TEMPDIR/`basename $0`.sps
+
+# ensure that top_srcdir and top_builddir  are absolute
+if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi
+if [ -z "$top_builddir" ] ; then top_builddir=. ; fi
+top_srcdir=`cd $top_srcdir; pwd`
+top_builddir=`cd $top_builddir; pwd`
+
+PSPP=$top_builddir/src/ui/terminal/pspp
+
+STAT_CONFIG_PATH=$top_srcdir/config
+export STAT_CONFIG_PATH
+
+LANG=C
+export LANG
+
+
+cleanup()
+{
+     if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then 
+       echo "NOT cleaning $TEMPDIR"
+       return ; 
+     fi
+     rm -rf $TEMPDIR
+}
+
+
+fail()
+{
+    echo $activity
+    echo FAILED
+    cleanup;
+    exit 1;
+}
+
+
+no_result()
+{
+    echo $activity
+    echo NO RESULT;
+    cleanup;
+    exit 2;
+}
+
+pass()
+{
+    cleanup;
+    exit 0;
+}
+
+mkdir -p $TEMPDIR
+
+cd $TEMPDIR
+
+
+activity="create program"
+cat > $TESTFILE <<EOF
+
+data list notable list  /var1 *
+       var2  *
+       var6  *
+       var7  *
+       var8  *
+       var9  *
+       var11 *
+       var12 *
+       var15 *
+       var16 *
+       var17 *
+       var19 *
+       .
+
+begin data.
+6 7 7 5 7 7 7 7 7 7 6 6
+6 7 7 6 7 6 7 5 6 5 7 7
+6 6 7 6 5 3 6 4 5 6 4 5
+4 6 5 6 6 5 4 3 5 6 5 6
+5 6 5 5 6 5 4 4 6 6 5 5
+6 6 7 6 6 5 6 5 6 6 5 6
+5 6 6 5 6 5 5 4 6 5 5 5
+5 7 7 7 7 7 6 5 7 7 7 7
+6 6 6 5 5 7 6 5 6 6 5 6
+. . . . . . . . . . . .
+6 6 5 5 5 6 6 4 6 5 5 5
+7 7 7 6 7 6 7 6 6 6 7 6
+4 7 6 6 6 5 5 4 4 5 5 6
+5 6 3 5 4 1 4 6 2 3 3 2
+3 6 6 5 6 2 4 2 2 4 4 5
+6 6 7 5 6 5 7 6 5 6 6 5
+6 5 6 6 5 6 6 6 6 4 5 5
+5 7 7 . 6 6 6 5 6 6 6 6
+5 7 5 5 4 6 7 6 5 4 6 5
+7 7 7 6 7 7 7 6 7 7 7 6
+3 6 5 6 5 7 7 3 4 7 5 7
+6 7 7 6 5 6 5 5 6 6 6 6
+5 5 6 5 5 5 5 4 5 5 5 6
+6 6 7 4 5 6 6 6 6 5 5 6
+6 5 6 6 4 4 5 4 5 6 4 5
+5 6 7 6 6 7 7 5 6 6 6 5
+5 6 5 7 4 6 6 5 7 7 5 6
+. . . . . . . . . . . .
+7 6 6 5 6 6 7 6 6 5 5 6
+6 6 7 7 7 7 7 6 7 6 6 7
+7 5 5 . 5 . 7 3 5 4 5 3
+7 6 7 5 4 5 7 5 7 5 5 6
+6 5 6 6 6 5 5 5 5 6 5 6
+7 7 7 7 7 7 7 7 5 6 7 7
+. . . . . . . . . . . .
+5 5 6 7 5 6 6 4 6 6 6 5
+6 6 5 7 5 6 7 5 6 5 4 6
+7 6 7 6 7 5 6 7 7 6 6 6
+5 6 5 6 5 6 7 2 5 7 3 7
+6 6 5 6 5 6 6 6 6 6 5 6
+7 6 7 6 6 6 6 6 6 7 6 7
+7 7 6 5 6 6 7 7 7 4 6 5
+3 7 7 6 6 7 7 7 6 6 6 4
+3 5 3 4 3 3 3 3 3 3 3 5
+5 7 7 7 5 7 6 2 6 7 6 7
+7 7 7 7 7 7 7 6 7 7 7 6
+6 5 7 4 4 4 5 6 5 5 4 5
+4 7 7 4 4 3 6 3 5 3 4 5
+7 7 7 7 7 7 7 7 7 7 7 5
+3 6 5 5 4 5 4 4 5 5 3 5
+6 7 6 6 6 7 7 6 6 6 7 6
+2 5 4 6 3 2 2 2 2 7 2 2
+4 6 6 5 5 5 6 5 5 6 6 5
+5 7 4 5 6 6 6 5 6 6 5 6
+5 7 7 5 6 5 6 5 5 4 5 4
+4 5 6 5 6 4 5 5 5 4 5 5
+7 6 6 5 5 6 7 5 6 5 7 6
+5 6 6 5 4 5 5 3 4 5 5 5
+5 7 6 4 4 5 6 5 6 4 4 6
+6 6 6 6 5 7 7 6 5 5 6 6
+6 6 7 6 7 6 6 5 6 7 6 5
+7 6 7 6 7 6 7 7 5 5 6 6
+5 6 6 5 5 5 6 5 6 7 7 5
+5 6 6 5 6 5 6 6 6 6 6 6
+5 5 5 5 6 4 5 3 4 7 6 5
+5 7 7 6 6 6 6 5 6 7 6 7
+6 6 7 7 7 5 6 5 5 5 5 4
+2 7 5 4 6 5 5 2 5 6 4 6
+6 7 7 5 6 6 7 6 6 7 5 7
+5 6 7 6 6 3 5 7 6 6 5 6
+6 6 6 3 5 5 5 6 6 6 4 5
+4 7 7 4 7 4 5 5 5 7 4 4
+. . . . . . . . . . . .
+6 6 7 6 7 6 7 7 6 7 7 6
+. . . . . . . . . . . .
+5 6 5 7 6 5 6 6 5 6 4 6
+5 5 5 5 4 5 5 5 7 5 5 5
+6 6 6 4 5 4 6 6 6 4 5 4
+6 5 7 4 6 4 6 5 6 6 6 3
+5 7 6 5 5 5 5 5 6 7 6 6
+5 5 7 7 5 5 6 6 5 5 5 7
+5 6 7 6 7 5 6 4 6 7 6 7
+4 5 5 5 6 5 6 5 6 6 5 6
+6 5 5 5 6 3 4 5 5 4 5 3
+6 6 6 5 5 5 4 3 4 5 5 5
+6 7 7 6 2 3 6 6 6 5 7 7
+6 7 5 5 6 6 6 5 6 6 6 6
+6 7 7 6 7 7 7 5 5 6 6 6
+6 6 6 6 7 6 6 7 6 6 6 6
+5 6 6 6 3 5 6 6 5 5 4 6
+4 6 5 6 6 5 6 5 6 6 5 5
+6 4 6 5 4 6 7 4 5 6 5 5
+6 7 6 4 6 5 7 6 7 7 6 5
+6 7 7 6 7 6 7 7 7 6 6 6
+6 6 6 4 5 6 7 7 5 6 4 4
+3 3 5 3 3 1 5 6 3 2 3 3
+7 7 5 6 6 7 7 6 7 7 7 7
+5 6 6 6 7 5 4 5 4 7 6 7
+3 6 5 4 3 3 3 5 5 6 3 4
+5 7 6 4 6 5 5 6 6 7 5 6
+5 7 6 6 6 6 6 5 6 7 7 6
+7 7 5 6 7 7 7 7 6 5 7 7
+6 7 6 6 5 6 7 7 6 5 6 6
+6 7 7 7 7 6 6 7 6 7 7 7
+4 6 4 7 3 6 5 5 4 3 5 6
+5 5 7 5 4 6 7 5 4 6 6 5
+5 5 6 4 6 5 7 6 5 5 5 6
+. . . . . . . . . . . .
+. . . . . . . . . . . .
+5 7 7 5 6 6 7 7 6 6 6 7
+6 7 7 1 2 1 7 7 5 5 5 2
+. . . . . . . . . . . .
+3 7 4 6 4 7 4 6 4 7 4 7
+5 7 3 5 5 6 7 5 4 7 7 4
+4 7 7 5 4 6 7 7 6 5 4 4
+6 6 2 2 6 4 6 5 5 1 5 2
+5 5 6 4 5 4 6 5 5 6 5 5
+. . . . . . . . . . . .
+5 7 6 6 6 6 6 6 5 6 6 6
+6 6 6 5 6 6 6 6 7 5 6 7
+3 6 3 3 5 3 3 5 3 5 7 4
+4 4 6 3 3 3 4 3 4 2 3 6
+5 7 7 6 5 4 7 5 7 7 3 7
+4 5 4 4 4 4 3 3 3 4 3 3
+6 7 7 5 6 6 7 5 4 5 5 5
+3 5 3 3 1 3 4 3 4 7 6 7
+4 5 4 4 4 3 4 5 6 6 4 5
+5 6 3 4 5 3 5 3 4 5 6 4
+5 5 5 6 6 6 6 4 5 6 6 5
+6 7 7 2 2 6 7 7 7 7 5 7
+5 7 7 4 6 5 7 5 5 5 6 6
+6 6 7 7 5 5 5 7 6 7 7 7
+6 5 7 3 6 5 6 5 5 6 5 4
+5 7 6 5 6 6 6 5 6 5 5 6
+4 5 5 5 6 3 5 3 3 6 5 5
+. . . . . . . . . . . .
+5 6 6 4 4 4 5 3 5 5 2 6
+5 6 7 5 5 6 6 5 5 6 6 6
+6 7 7 6 4 7 7 6 7 5 6 7
+6 6 5 4 5 2 7 6 6 5 6 6
+2 2 2 2 2 2 3 2 3 1 1 2
+end data.
+
+RELIABILITY
+  /VARIABLES=var2 var8 var15 var17 var6
+  /SCALE('Everything') var6 var8 var15 var17
+  /MODEL=ALPHA.
+
+RELIABILITY
+  /VARIABLES=var6 var8 var15 var17
+  /SCALE('Nothing') ALL
+  /MODEL=SPLIT(2)
+ .
+
+RELIABILITY
+  /VARIABLES=var2 var6 var8 var15 var17 var19
+  /SCALE('Totals') var6 var8 var15 var17 
+  /SUMMARY = total
+ .
+
+
+RELIABILITY
+  /VARIABLES=var6 var8 var15 var17 
+  .
+
+EOF
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="run program"
+$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE
+if [ $? -ne 0 ] ; then no_result ; fi
+
+activity="compare output"
+diff pspp.list - << EOF
+Scale: Everything
+
+1.1 RELIABILITY.  Case Processing Summary
+#==============#===#=====#
+#              # N |  %  #
+#==============#===#=====#
+#Cases Valid   #131| 92.9#
+#      Excluded# 10|  7.1#
+#      Total   #141|100.0#
+#==============#===#=====#
+
+1.2 RELIABILITY.  Reliability Statistics
+#================#==========#
+#Cronbach's Alpha#N of items#
+#================#==========#
+#            .748#         4#
+#================#==========#
+
+Scale: Nothing
+
+2.1 RELIABILITY.  Case Processing Summary
+#==============#===#=====#
+#              # N |  %  #
+#==============#===#=====#
+#Cases Valid   #131| 92.9#
+#      Excluded# 10|  7.1#
+#      Total   #141|100.0#
+#==============#===#=====#
+
+2.2 RELIABILITY.  Reliability Statistics
+#==========================================================#====#
+#Cronbach's Alpha               Part 1           Value     #.550#
+#                                                N of Items#   2#
+#                               Part 2           Value     #.631#
+#                                                N of Items#   2#
+#                               Total N of Items           #   4#
+#Correlation Between Forms                                 #.606#
+#Spearman-Brown Coefficient     Equal Length               #.755#
+#                               Unequal Length             #.755#
+#Guttman Split-Half Coefficient                            #.754#
+#==========================================================#====#
+
+Scale: Totals
+
+3.1 RELIABILITY.  Case Processing Summary
+#==============#===#=====#
+#              # N |  %  #
+#==============#===#=====#
+#Cases Valid   #131| 92.9#
+#      Excluded# 10|  7.1#
+#      Total   #141|100.0#
+#==============#===#=====#
+
+3.2 RELIABILITY.  Reliability Statistics
+#================#==========#
+#Cronbach's Alpha#N of items#
+#================#==========#
+#            .748#         4#
+#================#==========#
+
+3.3 RELIABILITY.  Item-Total Statistics
+#=====#==========================#==============================#================================#================================#
+#     #Scale Mean if Item Deleted|Scale Variance if Item Deleted|Corrected Item-Total Correlation|Cronbach's Alpha if Item Deleted#
+#=====#==========================#==============================#================================#================================#
+#var6 #                    15.969|                         8.430|                            .513|                            .705#
+#var8 #                    16.565|                         7.863|                            .530|                            .698#
+#var15#                    16.473|                         8.451|                            .558|                            .682#
+#var17#                    16.603|                         7.995|                            .570|                            .673#
+#=====#==========================#==============================#================================#================================#
+
+Scale: ANY
+
+4.1 RELIABILITY.  Case Processing Summary
+#==============#===#=====#
+#              # N |  %  #
+#==============#===#=====#
+#Cases Valid   #131| 92.9#
+#      Excluded# 10|  7.1#
+#      Total   #141|100.0#
+#==============#===#=====#
+
+4.2 RELIABILITY.  Reliability Statistics
+#================#==========#
+#Cronbach's Alpha#N of items#
+#================#==========#
+#            .748#         4#
+#================#==========#
+
+EOF
+if [ $? -ne 0 ] ; then fail ; fi
+
+
+pass;
author	John Darrington <john@marilyn.intra>
	Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)
committer	John Darrington <john@marilyn.intra>
	Fri, 12 Sep 2008 23:17:28 +0000 (07:17 +0800)
doc/statistics.texi		patch \| blob \| history
src/language/command.def		patch \| blob \| history
src/language/stats/automake.mk		patch \| blob \| history
src/language/stats/reliability.q	[new file with mode: 0644]	patch \| blob
src/libpspp/misc.h		patch \| blob \| history
tests/automake.mk		patch \| blob \| history
tests/command/reliability.sh	[new file with mode: 0755]	patch \| blob