Re-implemented the T-TEST command and the levene calculation.

author John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)
author John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)
diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk

index 4d4b662cad6b91d53e0e5323f2e937fd87f6d7ff..f7fd63737f77beffb30a432f16f6c829cfdcf731 100644 (file)
--- a/src/language/stats/automake.mk
+++ b/src/language/stats/automake.mk
@@ -7,8 +7,7 @@ src_language_stats_built_sources = \
         src/language/stats/examine.c \
         src/language/stats/frequencies.c \
         src/language/stats/rank.c \
-       src/language/stats/regression.c \
-       src/language/stats/t-test.c
+       src/language/stats/regression.c
  
  language_stats_sources = \
         src/language/stats/aggregate.c \
@@ -49,6 +48,11 @@ language_stats_sources = \
         src/language/stats/sort-cases.c \
         src/language/stats/sort-criteria.c \
         src/language/stats/sort-criteria.h \
+       src/language/stats/t-test.h \
+       src/language/stats/t-test-indep.c \
+       src/language/stats/t-test-one-sample.c \
+       src/language/stats/t-test-paired.c \
+       src/language/stats/t-test-parser.c \
         src/language/stats/wilcoxon.c \
         src/language/stats/wilcoxon.h
  
diff --git a/src/language/stats/oneway.c b/src/language/stats/oneway.c

index 9d4fdb8af1cb55c0f01740b33c72a0892f7ec53a..8e2805b3b1d54ca618b91d2b23889d10a84cdd75 100644 (file)
--- a/src/language/stats/oneway.c
+++ b/src/language/stats/oneway.c
@@ -91,7 +91,6 @@ struct oneway_spec
  
    /* The weight variable */
    const struct variable *wv;
-
  };
  
  /* Per category data */
@@ -109,6 +108,7 @@ struct per_var_ws
  {
    struct categoricals *cat;
    struct covariance *cov;
+  struct levene *nl;
  
    double sst;
    double sse;
@@ -117,7 +117,6 @@ struct per_var_ws
    int n_groups;
  
    double mse;
-  double levene_w;
  };
  
  struct oneway_workspace
@@ -393,6 +392,7 @@ run_oneway (const struct oneway_spec *cmd,
        ws.vws[v].cov = covariance_2pass_create (1, &cmd->vars[v],
                                                ws.vws[v].cat, 
                                                cmd->wv, cmd->exclude);
+      ws.vws[v].nl = levene_create (var_get_width (cmd->indep_var), NULL);
      }
  
    c = casereader_peek (input, 0);
@@ -413,20 +413,11 @@ run_oneway (const struct oneway_spec *cmd,
                                                cmd->exclude, NULL, NULL);
    input = casereader_create_filter_weight (input, dict, NULL, NULL);
  
-
-  if (cmd->stats & STATS_HOMOGENEITY)
-    for (v = 0; v < cmd->n_vars; ++v)
-      {
-       struct per_var_ws *pvw = &ws.vws[v];
-
-       pvw->levene_w = levene (input, cmd->indep_var, cmd->vars[v], cmd->wv, cmd->exclude);
-      }
-
    reader = casereader_clone (input);
-
    for (; (c = casereader_read (reader)) != NULL; case_unref (c))
      {
        int i;
+      double w = dict_get_case_weight (dict, c, NULL);
  
        for (i = 0; i < cmd->n_vars; ++i)
         {
@@ -441,13 +432,16 @@ run_oneway (const struct oneway_spec *cmd,
             }
  
           covariance_accumulate_pass1 (pvw->cov, c);
+         levene_pass_one (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
         }
      }
    casereader_destroy (reader);
+
    reader = casereader_clone (input);
    for ( ; (c = casereader_read (reader) ); case_unref (c))
      {
        int i;
+      double w = dict_get_case_weight (dict, c, NULL);
        for (i = 0; i < cmd->n_vars; ++i)
         {
           struct per_var_ws *pvw = &ws.vws[i];
@@ -461,10 +455,35 @@ run_oneway (const struct oneway_spec *cmd,
             }
  
           covariance_accumulate_pass2 (pvw->cov, c);
+         levene_pass_two (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
         }
      }
    casereader_destroy (reader);
  
+  reader = casereader_clone (input);
+  for ( ; (c = casereader_read (reader) ); case_unref (c))
+    {
+      int i;
+      double w = dict_get_case_weight (dict, c, NULL);
+
+      for (i = 0; i < cmd->n_vars; ++i)
+       {
+         struct per_var_ws *pvw = &ws.vws[i];
+         const struct variable *v = cmd->vars[i];
+         const union value *val = case_data (c, v);
+
+         if ( MISS_ANALYSIS == cmd->missing_type)
+           {
+             if ( var_is_value_missing (v, val, cmd->exclude))
+               continue;
+           }
+
+         levene_pass_three (pvw->nl, val->f, w, case_data (c, cmd->indep_var));
+       }
+    }
+  casereader_destroy (reader);
+
+
    for (v = 0; v < cmd->n_vars; ++v)
      {
        struct per_var_ws *pvw = &ws.vws[v];
@@ -476,8 +495,6 @@ run_oneway (const struct oneway_spec *cmd,
  
        pvw->sst = gsl_matrix_get (cm, 0, 0);
  
-      //      gsl_matrix_fprintf (stdout, cm, "%g ");
-
        reg_sweep (cm, 0);
  
        pvw->sse = gsl_matrix_get (cm, 0, 0);
@@ -512,11 +529,11 @@ run_oneway (const struct oneway_spec *cmd,
    for (v = 0; v < cmd->n_vars; ++v)
      {
        covariance_destroy (ws.vws[v].cov);
+      levene_destroy (ws.vws[v].nl);
        dd_destroy (ws.dd_total[v]);
      }
    free (ws.vws);
    free (ws.dd_total);
-
  }
  
  static void show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws);
@@ -846,7 +863,7 @@ show_homogeneity (const struct oneway_spec *cmd, const struct oneway_workspace *
      {
        double n;
        const struct per_var_ws *pvw = &ws->vws[v];
-      double F = pvw->levene_w;
+      double F = levene_calculate (pvw->nl);
  
        const struct variable *var = cmd->vars[v];
        const char *s = var_to_string (var);
diff --git a/src/language/stats/t-test-indep.c b/src/language/stats/t-test-indep.c

new file mode 100644 (file)

index 0000000..876b985
--- /dev/null
+++ b/src/language/stats/t-test-indep.c
@@ -0,0 +1,390 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "t-test.h"
+
+#include <gsl/gsl_cdf.h>
+#include <math.h>
+#include "libpspp/misc.h"
+
+#include "libpspp/str.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/variable.h"
+
+#include "math/moments.h"
+#include "math/levene.h"
+
+#include <output/tab.h>
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct indep_samples
+{
+  const struct variable *gvar;
+  bool cut;
+  const union value *gval0;
+  const union value *gval1;
+};
+
+struct pair_stats
+{
+  struct moments *mom[2];
+  double lev ;
+  struct levene *nl;
+};
+
+
+static void indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps);
+static void indep_test (const struct tt *tt, const struct pair_stats *ps);
+
+static int
+which_group (const union value *v, const struct indep_samples *is)
+{
+  int width = var_get_width (is->gvar);
+  int cmp = value_compare_3way (v, is->gval0, width);
+  if ( is->cut )
+    return  (cmp < 0);
+
+  if (cmp == 0)
+    return 0;
+
+  if (0 == value_compare_3way (v, is->gval1, width))
+    return 1;
+
+  return -1;
+}
+
+void
+indep_run (struct tt *tt, const struct variable *gvar,
+          bool cut,
+          const union value *gval0, const union value *gval1,
+          struct casereader *reader)
+{
+  struct indep_samples is;
+  struct ccase *c;
+  struct casereader *r;
+
+  struct pair_stats *ps = xcalloc (sizeof (*ps), tt->n_vars);
+
+  int v;
+
+  for (v = 0; v < tt->n_vars; ++v)
+    {
+      ps[v].mom[0] = moments_create (MOMENT_VARIANCE);
+      ps[v].mom[1] = moments_create (MOMENT_VARIANCE);
+      ps[v].nl = levene_create (var_get_width (gvar), cut ? gval0: NULL);
+    }
+
+  is.gvar = gvar;
+  is.gval0 = gval0;
+  is.gval1 = gval1;
+  is.cut = cut;
+
+  r = casereader_clone (reader);
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+
+      const union value *gv = case_data (c, gvar);
+      
+      int grp = which_group (gv, &is);
+      if ( grp < 0)
+       continue;
+
+      for (v = 0; v < tt->n_vars; ++v)
+       {
+         const union value *val = case_data (c, tt->vars[v]);
+         if (var_is_value_missing (tt->vars[v], val, tt->exclude))
+           continue;
+
+         moments_pass_one (ps[v].mom[grp], val->f, w);
+         levene_pass_one (ps[v].nl, val->f, w, gv);
+       }
+    }
+  casereader_destroy (r);
+
+  r = casereader_clone (reader);
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+
+      const union value *gv = case_data (c, gvar);
+
+      int grp = which_group (gv, &is);
+      if ( grp < 0)
+       continue;
+
+      for (v = 0; v < tt->n_vars; ++v)
+       {
+         const union value *val = case_data (c, tt->vars[v]);
+         if (var_is_value_missing (tt->vars[v], val, tt->exclude))
+           continue;
+
+         moments_pass_two (ps[v].mom[grp], val->f, w);
+         levene_pass_two (ps[v].nl, val->f, w, gv);
+       }
+    }
+  casereader_destroy (r);
+
+  r = reader;
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+
+      const union value *gv = case_data (c, gvar);
+
+      int grp = which_group (gv, &is);
+      if ( grp < 0)
+       continue;
+
+      for (v = 0; v < tt->n_vars; ++v)
+       {
+         const union value *val = case_data (c, tt->vars[v]);
+         if (var_is_value_missing (tt->vars[v], val, tt->exclude))
+           continue;
+
+         levene_pass_three (ps[v].nl, val->f, w, gv);
+       }
+    }
+  casereader_destroy (r);
+
+
+  for (v = 0; v < tt->n_vars; ++v)
+    ps[v].lev = levene_calculate (ps[v].nl);
+  
+  indep_summary (tt, &is, ps);
+  indep_test (tt, ps);
+
+
+  for (v = 0; v < tt->n_vars; ++v)
+    {
+      moments_destroy (ps[v].mom[0]);
+      moments_destroy (ps[v].mom[1]);
+      levene_destroy (ps[v].nl);
+    }
+  free (ps);
+}
+
+
+static void
+indep_summary (const struct tt *tt, struct indep_samples *is, const struct pair_stats *ps)
+{
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  int v;
+  int cols = 6;
+  const int heading_rows = 1;
+  int rows = tt->n_vars * 2 + heading_rows;
+
+  struct string vallab0 ;
+  struct string vallab1 ;
+  struct tab_table *t = tab_create (cols, rows);
+
+  ds_init_empty (&vallab0);
+  ds_init_empty (&vallab1);
+
+  tab_headers (t, 0, 0, 1, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
+  tab_hline (t, TAL_2, 0, cols - 1, 1);
+
+  tab_vline (t, TAL_GAP, 1, 0, rows - 1);
+  tab_title (t, _("Group Statistics"));
+  tab_text  (t, 1, 0, TAB_CENTER | TAT_TITLE, var_to_string (is->gvar));
+  tab_text  (t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
+  tab_text  (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
+  tab_text  (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
+  tab_text  (t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
+
+  if (is->cut)
+    {
+      /*      ds_put_cstr (&vallab0, "≥"); */
+      ds_put_cstr (&vallab0, ">=");
+      ds_put_cstr (&vallab1, "<");
+
+      var_append_value_name (is->gvar, is->gval0, &vallab0);
+      var_append_value_name (is->gvar, is->gval0, &vallab1);
+    }
+  else
+    {
+      var_append_value_name (is->gvar, is->gval0, &vallab0);
+      var_append_value_name (is->gvar, is->gval1, &vallab1);
+    }
+
+  for (v = 0; v < tt->n_vars; ++v)
+    {
+      int i;
+      const struct variable *var = tt->vars[v];
+
+      tab_text (t, 0, v * 2 + heading_rows, TAB_LEFT,
+                var_to_string (var));
+
+      tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT,
+                       ds_cstr (&vallab0));
+
+      tab_text (t, 1, v * 2 + 1 + heading_rows, TAB_LEFT,
+                       ds_cstr (&vallab1));
+
+      for (i = 0 ; i < 2; ++i)
+       {
+         double cc, mean, sigma;
+         moments_calculate (ps[v].mom[i], &cc, &mean, &sigma, NULL, NULL);
+      
+         tab_double (t, 2, v * 2 + i + heading_rows, TAB_RIGHT, cc, wfmt);
+         tab_double (t, 3, v * 2 + i + heading_rows, TAB_RIGHT, mean, NULL);
+         tab_double (t, 4, v * 2 + i + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
+         tab_double (t, 5, v * 2 + i + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL);
+       }
+    }
+
+  tab_submit (t);
+
+  ds_destroy (&vallab0);
+  ds_destroy (&vallab1);
+}
+
+
+static void
+indep_test (const struct tt *tt, const struct pair_stats *ps)
+{
+  int v;
+  const int heading_rows = 3;
+  const int rows= tt->n_vars * 2 + heading_rows;
+
+  const size_t cols = 11;
+
+  struct tab_table *t = tab_create (cols, rows);
+  tab_headers (t, 0, 0, 3, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
+  tab_hline (t, TAL_2, 0, cols - 1, 3);
+
+  tab_title (t, _("Independent Samples Test"));
+
+  tab_hline (t, TAL_1, 2, cols - 1, 1);
+  tab_vline (t, TAL_2, 2, 0, rows - 1);
+  tab_vline (t, TAL_1, 4, 0, rows - 1);
+  tab_box (t, -1, -1, -1, TAL_1, 2, 1, cols - 2, rows - 1);
+  tab_hline (t, TAL_1, cols - 2, cols - 1, 2);
+  tab_box (t, -1, -1, -1, TAL_1, cols - 2, 2, cols - 1, rows - 1);
+  tab_joint_text (t, 2, 0, 3, 0, TAB_CENTER, _("Levene's Test for Equality of Variances"));
+  tab_joint_text (t, 4, 0, cols - 1, 0, TAB_CENTER, _("t-test for Equality of Means"));
+
+  tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("F"));
+  tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig."));
+  tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("t"));
+  tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("df"));
+  tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
+  tab_text (t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
+  tab_text (t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference"));
+  tab_text (t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
+  tab_text (t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
+
+  tab_joint_text_format (t, 9, 1, 10, 1, TAB_CENTER,
+                         _("%g%% Confidence Interval of the Difference"),
+                         tt->confidence * 100.0);
+
+  for (v = 0; v < tt->n_vars; ++v)
+  {
+    double df, pooled_variance, mean_diff, tval;
+    double se2, std_err_diff;
+    double p, q;
+    double cc0, mean0, sigma0;
+    double cc1, mean1, sigma1;
+    moments_calculate (ps[v].mom[0], &cc0, &mean0, &sigma0, NULL, NULL);
+    moments_calculate (ps[v].mom[1], &cc1, &mean1, &sigma1, NULL, NULL);
+
+    tab_text (t, 0, v * 2 + heading_rows, TAB_LEFT, var_to_string (tt->vars[v]));
+    tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT, _("Equal variances assumed"));
+
+    df = cc0 + cc1 - 2.0;
+    tab_double (t, 5, v * 2 + heading_rows, TAB_RIGHT, df, NULL);
+    
+    pooled_variance = ((cc0 - 1)* sigma0 + (cc1 - 1) * sigma1) / df ;
+
+    tval = (mean0 - mean1) / sqrt (pooled_variance);
+    tval /= sqrt ((cc0 + cc1) / (cc0 * cc1));
+
+    tab_double (t, 4, v * 2 + heading_rows, TAB_RIGHT, tval, NULL);
+
+    p = gsl_cdf_tdist_P (tval, df);
+    q = gsl_cdf_tdist_Q (tval, df);
+
+    mean_diff = mean0 - mean1;
+
+    tab_double (t, 6, v * 2 + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p),   NULL);
+    tab_double (t, 7, v * 2 + heading_rows, TAB_RIGHT, mean_diff, NULL);
+
+    std_err_diff = sqrt ((sigma0 / cc0) + (sigma1 / cc1));
+    tab_double (t, 8, v * 2 + heading_rows, TAB_RIGHT, std_err_diff, NULL);
+
+
+    /* Now work out the confidence interval */
+    q = (1 - tt->confidence)/2.0;  /* 2-tailed test */
+
+    tval = gsl_cdf_tdist_Qinv (q, df);
+    tab_double (t,  9, v * 2 + heading_rows, TAB_RIGHT, mean_diff - tval * std_err_diff, NULL);
+    tab_double (t, 10, v * 2 + heading_rows, TAB_RIGHT, mean_diff + tval * std_err_diff, NULL);
+
+    /* Equal variances not assumed */
+    tab_text (t, 1, v * 2 + heading_rows + 1,  TAB_LEFT, _("Equal variances not assumed"));
+
+    se2 = sigma0 / cc0 + sigma1 / cc1;
+    tval = mean_diff / sqrt (se2);
+    tab_double (t, 4, v * 2 + heading_rows + 1, TAB_RIGHT, tval, NULL);
+
+    {
+      double p, q;
+      const double s0 = sigma0 / (cc0);
+      const double s1 = sigma1 / (cc1);
+      double df = pow2 (s0 + s1) ;
+      df /= pow2 (s0) / (cc0 - 1) + pow2 (s1) / (cc1 - 1);
+
+      tab_double (t, 5, v * 2 + heading_rows + 1, TAB_RIGHT, df, NULL);
+
+      p = gsl_cdf_tdist_P (tval, df);
+      q = gsl_cdf_tdist_Q (tval, df);
+
+      tab_double (t, 6, v * 2 + heading_rows + 1, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL);
+
+      /* Now work out the confidence interval */
+      q = (1 - tt->confidence) / 2.0;  /* 2-tailed test */
+
+      tval = gsl_cdf_tdist_Qinv (q, df);
+    }
+
+    tab_double (t, 7, v * 2 + heading_rows + 1, TAB_RIGHT, mean_diff, NULL);
+    tab_double (t, 8, v * 2 + heading_rows + 1, TAB_RIGHT, std_err_diff, NULL);
+    tab_double (t, 9, v * 2 + heading_rows + 1, TAB_RIGHT,  mean_diff - tval * std_err_diff, NULL);
+    tab_double (t, 10, v * 2 + heading_rows + 1, TAB_RIGHT, mean_diff + tval * std_err_diff, NULL);
+
+    tab_double (t, 2, v * 2 + heading_rows, TAB_CENTER, ps[v].lev, NULL);
+
+
+    {
+      /* Now work out the significance of the Levene test */
+      double df1 = 1;
+      double df2 = cc0 + cc1 - 2;
+      double q = gsl_cdf_fdist_Q (ps[v].lev, df1, df2);
+      tab_double (t, 3, v * 2 + heading_rows, TAB_CENTER, q, NULL);
+    }
+  }
+
+  tab_submit (t);
+}
diff --git a/src/language/stats/t-test-one-sample.c b/src/language/stats/t-test-one-sample.c

new file mode 100644 (file)

index 0000000..38c1eff
--- /dev/null
+++ b/src/language/stats/t-test-one-sample.c
@@ -0,0 +1,253 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+
+#include "t-test.h"
+
+#include <math.h>
+#include <gsl/gsl_cdf.h>
+
+#include "data/variable.h"
+#include "data/format.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "libpspp/hash-functions.h"
+#include "libpspp/hmapx.h"
+#include "math/moments.h"
+
+#include "output/tab.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct per_var_stats
+{
+  const struct variable *var;
+
+  /* The position for reporting purposes */
+  int posn;
+
+  /* N, Mean, Variance */
+  struct moments *mom;
+
+  /* Sum of the differences */
+  double sum_diff;
+};
+
+
+struct one_samp
+{
+  struct hmapx hmap;
+  double testval;
+};
+
+
+static void
+one_sample_test (const struct tt *tt, const struct one_samp *os)
+{
+  struct hmapx_node *node;
+  struct per_var_stats *per_var_stats;
+
+  const int heading_rows = 3;
+  const size_t rows = heading_rows + tt->n_vars;
+  const size_t cols = 7;
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  struct tab_table *t = tab_create (cols, rows);
+
+  tab_headers (t, 0, 0, heading_rows, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
+  tab_hline (t, TAL_2, 0, cols - 1, 3);
+
+  tab_title (t, _("One-Sample Test"));
+  tab_hline (t, TAL_1, 1, cols - 1, 1);
+  tab_vline (t, TAL_2, 1, 0, rows - 1);
+
+  tab_joint_text_format (t, 1, 0, cols - 1, 0, TAB_CENTER,
+                         _("Test Value = %f"), os->testval);
+
+  tab_box (t, -1, -1, -1, TAL_1, 1, 1, cols - 1, rows - 1);
+
+  tab_joint_text_format (t, 5, 1, 6, 1, TAB_CENTER,
+                         _("%g%% Confidence Interval of the Difference"),
+                         tt->confidence * 100.0);
+
+  tab_vline (t, TAL_GAP, 6, 1, 1);
+  tab_hline (t, TAL_1, 5, 6, 2);
+  tab_text (t, 1, 2, TAB_CENTER | TAT_TITLE, _("t"));
+  tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("df"));
+  tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
+  tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
+  tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
+  tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
+
+  HMAPX_FOR_EACH (per_var_stats, node, &os->hmap)
+    {
+      const struct moments *m = per_var_stats->mom;
+      double cc, mean, sigma;
+      double tval, df;
+      double p, q;
+      double mean_diff;
+      double se_mean ;
+      const int v = per_var_stats->posn;
+
+      moments_calculate (m, &cc, &mean, &sigma, NULL, NULL);
+
+      tval = (mean - os->testval) * sqrt (cc / sigma);
+
+      mean_diff = per_var_stats->sum_diff / cc;
+      se_mean = sqrt (sigma / cc);
+      df = cc - 1.0;
+      p = gsl_cdf_tdist_P (tval, df);
+      q = gsl_cdf_tdist_Q (tval, df);
+
+      tab_text (t, 0, v + heading_rows, TAB_LEFT, var_to_string (per_var_stats->var));
+      tab_double (t, 1, v + heading_rows, TAB_RIGHT, tval, NULL);
+      tab_double (t, 2, v + heading_rows, TAB_RIGHT, df, wfmt);
+
+      /* Multiply by 2 to get 2-tailed significance, makeing sure we've got
+        the correct tail*/
+      tab_double (t, 3, v + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL);
+
+      tab_double (t, 4, v + heading_rows, TAB_RIGHT, mean_diff,  NULL);
+
+      tval = gsl_cdf_tdist_Qinv ( (1.0 - tt->confidence) / 2.0, df);
+
+      tab_double (t, 5, v + heading_rows, TAB_RIGHT, mean_diff - tval * se_mean, NULL);
+      tab_double (t, 6, v + heading_rows, TAB_RIGHT, mean_diff + tval * se_mean, NULL);
+    }
+
+  tab_submit (t);
+}
+
+static void
+one_sample_summary (const struct tt *tt, const struct one_samp *os)
+{
+  struct hmapx_node *node;
+  struct per_var_stats *per_var_stats;
+
+  const int cols = 5;
+  const int heading_rows = 1;
+  const int rows = tt->n_vars + heading_rows;
+  struct tab_table *t = tab_create (cols, rows);
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  tab_headers (t, 0, 0, heading_rows, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
+  tab_hline (t, TAL_2, 0, cols - 1, 1);
+
+  tab_title (t, _("One-Sample Statistics"));
+  tab_vline (t, TAL_2, 1, 0, rows - 1);
+  tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("N"));
+  tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
+  tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
+  tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
+
+  HMAPX_FOR_EACH (per_var_stats, node, &os->hmap)
+    {
+      const struct moments *m = per_var_stats->mom;
+      const int v = per_var_stats->posn;
+      double cc, mean, sigma;
+      moments_calculate (m, &cc, &mean, &sigma, NULL, NULL);
+
+      tab_text (t, 0, v + heading_rows, TAB_LEFT, var_to_string (per_var_stats->var));
+      tab_double (t, 1, v + heading_rows, TAB_RIGHT, cc, wfmt);
+      tab_double (t, 2, v + heading_rows, TAB_RIGHT, mean, NULL);
+      tab_double (t, 3, v + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
+      tab_double (t, 4, v + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL);
+    }
+
+  tab_submit (t);
+}
+
+void
+one_sample_run (const struct tt *tt, double testval, struct casereader *reader)
+{
+  int i;
+  struct ccase *c;
+  struct one_samp os;
+  struct casereader *r;
+  struct hmapx_node *node;
+  struct per_var_stats *per_var_stats;
+
+  os.testval = testval;
+  hmapx_init (&os.hmap);
+
+  /* Insert all the variables into the map */
+  for (i = 0; i < tt->n_vars; ++i)
+    {
+      struct per_var_stats *per_var_stats = xzalloc (sizeof (*per_var_stats));
+
+      per_var_stats->posn = i;
+      per_var_stats->var = tt->vars[i];
+      per_var_stats->mom = moments_create (MOMENT_VARIANCE);
+
+      hmapx_insert (&os.hmap, per_var_stats, hash_pointer (per_var_stats->var, 0));
+    }
+
+  r = casereader_clone (reader);
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+      struct hmapx_node *node;
+      struct per_var_stats *per_var_stats;
+      HMAPX_FOR_EACH (per_var_stats, node, &os.hmap)
+       {
+         const struct variable *var = per_var_stats->var;
+         const union value *val = case_data (c, var);
+         if (var_is_value_missing (var, val, tt->exclude))
+           continue;
+
+         moments_pass_one (per_var_stats->mom, val->f, w);
+       }
+    }
+  casereader_destroy (r);
+
+  r = reader;
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+      struct hmapx_node *node;
+      struct per_var_stats *per_var_stats;
+      HMAPX_FOR_EACH (per_var_stats, node, &os.hmap)
+       {
+         const struct variable *var = per_var_stats->var;
+         const union value *val = case_data (c, var);
+         if (var_is_value_missing (var, val, tt->exclude))
+           continue;
+
+         moments_pass_two (per_var_stats->mom, val->f, w);
+         per_var_stats->sum_diff += w * (val->f - os.testval);
+       }
+    }
+  casereader_destroy (r);
+
+  one_sample_summary (tt, &os);
+  one_sample_test (tt, &os);
+
+  HMAPX_FOR_EACH (per_var_stats, node, &os.hmap)
+    {
+      moments_destroy (per_var_stats->mom);
+      free (per_var_stats);
+    }
+
+  hmapx_destroy (&os.hmap);
+}
+
diff --git a/src/language/stats/t-test-paired.c b/src/language/stats/t-test-paired.c

new file mode 100644 (file)

index 0000000..edeaa38
--- /dev/null
+++ b/src/language/stats/t-test-paired.c
@@ -0,0 +1,356 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include <math.h>
+#include <gsl/gsl_cdf.h>
+
+#include "t-test.h"
+
+#include "math/moments.h"
+#include "math/correlation.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/variable.h"
+#include "libpspp/hmapx.h"
+#include "libpspp/hash-functions.h"
+
+#include "output/tab.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct pair_stats
+{
+  int posn;
+  double sum_of_prod;
+  struct moments *mom0;
+  const struct variable *var0;
+
+  struct moments *mom1;
+  const struct variable *var1;
+
+  struct moments *mom_diff;
+};
+
+struct paired_samp
+{
+  struct hmapx hmap;
+};
+
+static void paired_summary (const struct tt *tt, struct paired_samp *os);
+static void paired_correlations (const struct tt *tt, struct paired_samp *os);
+static void paired_test (const struct tt *tt, const struct paired_samp *os);
+
+void
+paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *reader)
+{
+  int i;
+  struct ccase *c;
+  struct paired_samp ps;
+  struct casereader *r;
+  struct hmapx_node *node;
+  struct pair_stats *pp = NULL;
+
+  hmapx_init (&ps.hmap);
+
+  for (i = 0; i < n_pairs; ++i)
+    {
+      vp *pair = &pairs[i];
+      unsigned int hash;
+      struct pair_stats *pp = xzalloc (sizeof *pp);
+      pp->posn = i;
+      pp->var0 = (*pair)[0];
+      pp->var1 = (*pair)[1];
+      pp->mom0 = moments_create (MOMENT_VARIANCE);
+      pp->mom1 = moments_create (MOMENT_VARIANCE);
+      pp->mom_diff = moments_create (MOMENT_VARIANCE);
+
+      hash = hash_pointer ((*pair)[0], 0);
+      hash = hash_pointer ((*pair)[1], hash);
+
+      hmapx_insert (&ps.hmap, pp, hash);
+    }
+
+  r = casereader_clone (reader);
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+
+      struct hmapx_node *node;
+      struct pair_stats *pp = NULL;
+      HMAPX_FOR_EACH (pp, node, &ps.hmap)
+       {
+         const union value *val0 = case_data (c, pp->var0);
+         const union value *val1 = case_data (c, pp->var1);
+          if (var_is_value_missing (pp->var0, val0, tt->exclude))
+           continue;
+
+          if (var_is_value_missing (pp->var1, val1, tt->exclude))
+           continue;
+
+         moments_pass_one (pp->mom0, val0->f, w);
+         moments_pass_one (pp->mom1, val1->f, w);
+         moments_pass_one (pp->mom_diff, val0->f - val1->f, w);
+       }
+    }
+  casereader_destroy (r);
+
+  r = reader;
+  for ( ; (c = casereader_read (r) ); case_unref (c))
+    {
+      double w = dict_get_case_weight (tt->dict, c, NULL);
+
+      struct hmapx_node *node;
+      struct pair_stats *pp = NULL;
+      HMAPX_FOR_EACH (pp, node, &ps.hmap)
+       {
+         const union value *val0 = case_data (c, pp->var0);
+         const union value *val1 = case_data (c, pp->var1);
+          if (var_is_value_missing (pp->var0, val0, tt->exclude))
+           continue;
+
+          if (var_is_value_missing (pp->var1, val1, tt->exclude))
+           continue;
+
+         moments_pass_two (pp->mom0, val0->f, w);
+         moments_pass_two (pp->mom1, val1->f, w);
+         moments_pass_two (pp->mom_diff, val0->f - val1->f, w);
+         pp->sum_of_prod += val0->f * val1->f;
+       }
+    }
+  casereader_destroy (r);
+
+  paired_summary (tt, &ps);
+  paired_correlations (tt, &ps);
+  paired_test (tt, &ps);
+
+  /* Clean up */
+  HMAPX_FOR_EACH (pp, node, &ps.hmap)
+    {
+      moments_destroy (pp->mom0);
+      moments_destroy (pp->mom1);
+      moments_destroy (pp->mom_diff);
+      free (pp);
+    }
+
+  hmapx_destroy (&ps.hmap);
+}
+
+static void
+paired_summary (const struct tt *tt, struct paired_samp *os)
+{
+  size_t n_pairs = hmapx_count (&os->hmap);
+  struct hmapx_node *node;
+  struct pair_stats *pp = NULL;
+
+  const int heading_rows = 1;
+  const int heading_cols = 2;
+
+  const int cols = 4 + heading_cols;
+  const int rows = n_pairs * 2 + heading_rows;
+  struct tab_table *t = tab_create (cols, rows);
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  tab_headers (t, 0, 0, heading_rows, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
+  tab_box (t, -1, -1,       TAL_0, TAL_1, heading_cols, 0, cols - 1, rows - 1);
+
+  tab_hline (t, TAL_2, 0, cols - 1, 1);
+
+  tab_title (t, _("Paired Sample Statistics"));
+  tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
+  tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
+  tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
+  tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
+  tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
+
+  HMAPX_FOR_EACH (pp, node, &os->hmap)
+    {
+      int v = pp->posn;
+      double cc, mean, sigma;
+
+      tab_text_format (t, 0, v * 2 + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn);
+
+      /* first var */
+      moments_calculate (pp->mom0, &cc, &mean, &sigma, NULL, NULL);
+      tab_text (t, 1, v * 2 + heading_rows, TAB_LEFT, var_to_string (pp->var0));
+      tab_double (t, 3, v * 2 + heading_rows, TAB_RIGHT, cc, wfmt);
+      tab_double (t, 2, v * 2 + heading_rows, TAB_RIGHT, mean, NULL);
+      tab_double (t, 4, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
+      tab_double (t, 5, v * 2 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL);
+
+      /* second var */
+      moments_calculate (pp->mom1, &cc, &mean, &sigma, NULL, NULL);
+      tab_text (t, 1, v * 2 + 1 + heading_rows, TAB_LEFT, var_to_string (pp->var1));      
+      tab_double (t, 3, v * 2 + 1 + heading_rows, TAB_RIGHT, cc, wfmt);
+      tab_double (t, 2, v * 2 + 1 + heading_rows, TAB_RIGHT, mean, NULL);
+      tab_double (t, 4, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
+      tab_double (t, 5, v * 2 + 1 + heading_rows, TAB_RIGHT, sqrt (sigma / cc), NULL);
+    }
+
+  tab_submit (t);
+}
+
+
+static void
+paired_correlations (const struct tt *tt, struct paired_samp *os)
+{
+  size_t n_pairs = hmapx_count (&os->hmap);
+  struct hmapx_node *node;
+  struct pair_stats *pp = NULL;
+  const int heading_rows = 1;
+  const int heading_cols = 2;
+
+  const int cols = 5;
+  const int rows = n_pairs + heading_rows;
+  struct tab_table *t = tab_create (cols, rows);
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  tab_headers (t, 0, 0, heading_rows, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
+
+  tab_hline (t, TAL_2, 0, cols - 1, 1);
+
+  tab_title (t, _("Paired Samples Correlations"));
+  tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
+  tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
+  tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation"));
+  tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig."));
+
+  HMAPX_FOR_EACH (pp, node, &os->hmap)
+    {
+      double corr;
+      double cc0, mean0, sigma0;
+      double cc1, mean1, sigma1;
+      int v = pp->posn;
+
+      tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), pp->posn);
+
+      tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s & %s"), 
+                      var_to_string (pp->var0),
+                      var_to_string (pp->var1));
+
+      moments_calculate (pp->mom0, &cc0, &mean0, &sigma0, NULL, NULL);
+      moments_calculate (pp->mom1, &cc1, &mean1, &sigma1, NULL, NULL);
+
+      /* If this fails, then we're not dealing with missing values properly */
+      assert (cc0 == cc1);
+
+      tab_double (t, 2, v + heading_rows, TAB_RIGHT, cc0, wfmt);
+
+      corr = pp->sum_of_prod / cc0  - (mean0 * mean1);
+      corr /= sqrt (sigma0 * sigma1);
+      corr *= cc0 / (cc0 - 1);
+
+      tab_double (t, 3, v + heading_rows, TAB_RIGHT, corr, NULL);
+      tab_double (t, 4, v + heading_rows, TAB_RIGHT, 
+                 2.0 * significance_of_correlation (corr, cc0), NULL);
+    }
+
+  tab_submit (t);
+}
+
+
+static void
+paired_test (const struct tt *tt, const struct paired_samp *os)
+{
+  size_t n_pairs = hmapx_count (&os->hmap);
+  struct hmapx_node *node;
+  struct pair_stats *pp = NULL;
+
+  const int heading_rows = 3;
+  const int heading_cols = 2;
+  const size_t rows = heading_rows + n_pairs;
+  const size_t cols = 10;
+  const struct fmt_spec *wfmt = tt->wv ? var_get_print_format (tt->wv) : & F_8_0;
+
+  struct tab_table *t = tab_create (cols, rows);
+
+  tab_headers (t, 0, 0, heading_rows, 0);
+  tab_box (t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
+  tab_hline (t, TAL_2, 0, cols - 1, 3);
+
+  tab_title (t, _("Paired Samples Test"));
+  tab_hline (t, TAL_1, heading_cols, 6, 1);
+  tab_vline (t, TAL_2, heading_cols, 0, rows - 1);
+
+  tab_box (t, -1, -1, -1, TAL_1, heading_cols, 0, cols - 1, rows - 1);
+
+  tab_joint_text (t, 2, 0, 6, 0, TAB_CENTER,
+                 _("Paired Differences"));
+
+  tab_joint_text_format (t, 5, 1, 6, 1, TAB_CENTER,
+                         _("%g%% Confidence Interval of the Difference"),
+                         tt->confidence * 100.0);
+
+  tab_vline (t, TAL_GAP, 6, 1, 1);
+  tab_hline (t, TAL_1, 5, 6, 2);
+  tab_text (t, 7, 2, TAB_CENTER | TAT_TITLE, _("t"));
+  tab_text (t, 8, 2, TAB_CENTER | TAT_TITLE, _("df"));
+  tab_text (t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
+  tab_text (t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean"));
+  tab_text (t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
+  tab_text (t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
+
+  tab_text (t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
+  tab_text (t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
+
+  HMAPX_FOR_EACH (pp, node, &os->hmap)
+    {
+      int v = pp->posn;
+      double cc, mean, sigma;
+      double df ;
+      double tval;
+      double p, q;
+      double se_mean;
+
+      moments_calculate (pp->mom_diff, &cc, &mean, &sigma, NULL, NULL);
+
+      df = cc - 1.0;
+      tab_text_format (t, 0, v + heading_rows, TAB_LEFT, _("Pair %d"), v);
+
+      tab_text_format (t, 1, v + heading_rows, TAB_LEFT, _("%s - %s"), 
+                      var_to_string (pp->var0),
+                      var_to_string (pp->var1));
+
+      tval = mean * sqrt (cc / sigma);
+      se_mean = sqrt (sigma / cc);
+
+      tab_double (t, 2, v + heading_rows, TAB_RIGHT, mean, NULL);
+      tab_double (t, 3, v + heading_rows, TAB_RIGHT, sqrt (sigma), NULL);
+      tab_double (t, 4, v + heading_rows, TAB_RIGHT, se_mean, NULL);
+
+      tab_double (t, 7, v + heading_rows, TAB_RIGHT, tval, NULL);
+      tab_double (t, 8, v + heading_rows, TAB_RIGHT, df, wfmt);
+
+
+      p = gsl_cdf_tdist_P (tval, df);
+      q = gsl_cdf_tdist_Q (tval, df);
+
+      tab_double (t, 9, v + heading_rows, TAB_RIGHT, 2.0 * (tval > 0 ? q : p), NULL);
+
+      tval = gsl_cdf_tdist_Qinv ( (1.0 - tt->confidence) / 2.0, df);
+
+      tab_double (t, 5, v + heading_rows, TAB_RIGHT, mean - tval * se_mean, NULL);
+      tab_double (t, 6, v + heading_rows, TAB_RIGHT, mean + tval * se_mean, NULL);
+    }
+
+  tab_submit (t);
+}
diff --git a/src/language/stats/t-test-parser.c b/src/language/stats/t-test-parser.c

new file mode 100644 (file)

index 0000000..51f30b9
--- /dev/null
+++ b/src/language/stats/t-test-parser.c
@@ -0,0 +1,366 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "libpspp/message.h"
+
+#include "data/casegrouper.h"
+#include "data/casereader.h"
+#include "data/dictionary.h"
+#include "data/dataset.h"
+#include "data/missing-values.h"
+
+#include "language/lexer/lexer.h"
+#include "language/command.h"
+#include "language/lexer/variable-parser.h"
+#include "language/lexer/value-parser.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+#include "t-test.h"
+
+int
+cmd_t_test (struct lexer *lexer, struct dataset *ds)
+{
+  bool ok;
+  const struct dictionary *dict = dataset_dict (ds);
+  struct tt tt;
+  int mode_count = 0;
+
+  /* Variables pertaining to the paired mode */
+  const struct variable **v1 = NULL;
+  size_t n_v1;
+  const struct variable **v2 = NULL;
+  size_t n_v2;
+         
+  size_t n_pairs;
+  vp *pairs = NULL;
+
+
+  /* One sample mode */
+  double testval;
+
+  /* Independent samples mode */
+  const struct variable *gvar;
+  union value gval0;
+  union value gval1;
+  bool cut;
+
+  tt.wv = dict_get_weight (dict);
+  tt.dict = dict;
+  tt.confidence = 0.95;
+  tt.exclude = MV_ANY;
+  tt.missing_type = MISS_ANALYSIS;
+  tt.n_vars = 0;
+  tt.vars = NULL;
+
+  lex_match (lexer, T_EQUALS);
+
+  for (; lex_token (lexer) != T_ENDCMD; )
+    {
+      lex_match (lexer, T_SLASH);
+      if (lex_match_id (lexer, "TESTVAL"))
+       {
+         mode_count++;
+         tt.mode = MODE_SINGLE;
+         lex_match (lexer, T_EQUALS);
+         lex_force_num (lexer);
+         testval = lex_number (lexer);
+         lex_get (lexer);
+       }
+      else if (lex_match_id (lexer, "GROUPS"))
+       {
+         mode_count++;
+         cut = false;
+         tt.mode = MODE_INDEP;
+         lex_match (lexer, T_EQUALS);
+
+         if (NULL == (gvar = parse_variable (lexer, dict)))
+           goto parse_failed;
+      
+         if (lex_match (lexer, T_LPAREN))
+           {
+
+             value_init (&gval0, var_get_width (gvar));
+             parse_value (lexer, &gval0, gvar);
+             cut = true;
+             if (lex_match (lexer, T_COMMA))
+               {
+                 value_init (&gval1, var_get_width (gvar));
+                 parse_value (lexer, &gval1, gvar);
+                 cut = false;
+               }
+
+             lex_force_match (lexer, T_RPAREN);
+           }
+         else
+           {
+             value_init (&gval0, 0);
+             value_init (&gval1, 0);
+             gval0.f = 1.0;
+             gval1.f = 2.0;
+             cut = false;
+           }
+
+         if ( cut == true && var_is_alpha (gvar))
+           {
+             msg (SE, _("When applying GROUPS to a string variable, two "
+                        "values must be specified."));
+             goto parse_failed;
+           }
+       }
+      else if (lex_match_id (lexer, "PAIRS"))
+       {
+         bool with = false;
+         bool paired = false;
+         mode_count++;
+         tt.mode = MODE_PAIRED;
+         lex_match (lexer, T_EQUALS);
+
+         if (!parse_variables_const (lexer, dict,
+                                     &v1, &n_v1,
+                                     PV_NO_DUPLICATE | PV_NUMERIC))
+           goto parse_failed;
+
+         if ( lex_match (lexer, T_WITH))
+           {
+             with = true;
+             if (!parse_variables_const (lexer, dict,
+                                         &v2, &n_v2,
+                                         PV_NO_DUPLICATE | PV_NUMERIC))
+               goto parse_failed;
+
+             if (lex_match (lexer, T_LPAREN)
+                 && lex_match_id (lexer, "PAIRED")
+                 && lex_match (lexer, T_RPAREN))
+               {
+                 paired = true;
+                 if (n_v1 != n_v2)
+                   {
+                     msg (SE, _("PAIRED was specified but the number of variables "
+                                "preceding WITH (%zu) did not match the number "
+                                "following (%zu)."),
+                          n_v1, n_v2);
+                     goto parse_failed;
+                   }
+               }
+           }
+         {
+           int i;
+
+           if ( !with )
+             n_pairs = (n_v1 * (n_v1 - 1)) / 2.0;
+           else if ( paired )
+             n_pairs = n_v1;
+           else
+             n_pairs = n_v1 * n_v2;
+         
+           pairs = xcalloc (sizeof *pairs, n_pairs);
+
+
+           if ( with)
+             {
+               int x = 0;
+               if (paired)
+                 {
+                   for (i = 0 ; i < n_v1; ++i)
+                     {
+                       vp *pair = &pairs[i];
+                       (*pair)[0] = v1[i];
+                       (*pair)[1] = v2[i];
+                     } 
+                 }
+               else
+                 {
+                   for (i = 0 ; i < n_v1; ++i)
+                     {
+                       int j;
+                       for (j = 0 ; j < n_v2; ++j)
+                         {
+                           vp *pair = &pairs[x++];
+                           (*pair)[0] = v1[i];
+                           (*pair)[1] = v2[j];
+                         }
+                     }
+                 }
+             }
+           else
+             {
+               int x = 0;
+               for (i = 0 ; i < n_v1; ++i)
+                 {
+                   int j;
+
+                   for (j = i + 1 ; j < n_v1; ++j)
+                     {
+                       vp *pair = &pairs[x++];
+                       (*pair)[0] = v1[i];
+                       (*pair)[1] = v1[j];
+                     }
+                 }
+             }
+
+         }
+       }
+      else if (lex_match_id (lexer, "VARIABLES"))
+       {
+         if ( tt.mode == MODE_PAIRED)
+           {
+             msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
+             goto parse_failed;
+           }
+
+         lex_match (lexer, T_EQUALS);
+
+         if (!parse_variables_const (lexer, dict,
+                                     &tt.vars,
+                                     &tt.n_vars,
+                                     PV_NO_DUPLICATE | PV_NUMERIC))
+           goto parse_failed;
+       }
+      else if ( lex_match_id (lexer, "MISSING"))
+       {
+          lex_match (lexer, T_EQUALS);
+          while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
+            {
+             if (lex_match_id (lexer, "INCLUDE"))
+               {
+                 tt.exclude = MV_SYSTEM;
+               }
+             else if (lex_match_id (lexer, "EXCLUDE"))
+               {
+                 tt.exclude = MV_ANY;
+               }
+             else if (lex_match_id (lexer, "LISTWISE"))
+               {
+                 tt.missing_type = MISS_LISTWISE;
+               }
+             else if (lex_match_id (lexer, "ANALYSIS"))
+               {
+                 tt.missing_type = MISS_ANALYSIS;
+               }
+             else
+               {
+                  lex_error (lexer, NULL);
+                 goto parse_failed;
+               }
+             lex_match (lexer, T_COMMA);
+           }
+       }
+      else if (lex_match_id (lexer, "CRITERIA"))
+       {
+          lex_match (lexer, T_EQUALS);
+         if ( lex_force_match_id (lexer, "CIN"))
+           if ( lex_force_match (lexer, T_LPAREN))
+             {
+               lex_force_num (lexer);
+               tt.confidence = lex_number (lexer);
+               lex_get (lexer);
+               lex_force_match (lexer, T_RPAREN);
+             }
+       }
+      else 
+       {
+         lex_error (lexer, NULL);
+         goto parse_failed;
+       }
+    }
+
+  if ( mode_count != 1)
+    {
+      msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands "
+                "must be specified."));
+      goto parse_failed;
+    }
+
+  if (tt.n_vars == 0 && tt.mode != MODE_PAIRED)
+    {
+      msg (SE, _("One or more VARIABLES must be specified."));
+      goto parse_failed;
+    }
+
+
+
+  /* Deal with splits etc */
+  {
+    struct casereader *group;
+    struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), dict);
+
+    while (casegrouper_get_next_group (grouper, &group))
+      {
+       if ( tt.mode == MODE_SINGLE)
+         {
+           if ( tt.missing_type == MISS_LISTWISE )
+             group  = casereader_create_filter_missing (group,
+                                                        tt.vars, tt.n_vars,
+                                                        tt.exclude,
+                                                        NULL,  NULL);
+           one_sample_run (&tt, testval, group);
+         }
+       else if ( tt.mode == MODE_PAIRED)
+         {
+           if ( tt.missing_type == MISS_LISTWISE )
+             {
+               group  = casereader_create_filter_missing (group,
+                                                          v1, n_v1,
+                                                          tt.exclude,
+                                                          NULL,  NULL);
+               group  = casereader_create_filter_missing (group,
+                                                          v2, n_v2,
+                                                          tt.exclude,
+                                                          NULL,  NULL);
+             }
+
+           paired_run (&tt, n_pairs, pairs, group);
+         }
+       else /* tt.mode == MODE_INDEP */
+         {
+           if ( tt.missing_type == MISS_LISTWISE )
+             {
+               group  = casereader_create_filter_missing (group,
+                                                          tt.vars, tt.n_vars,
+                                                          tt.exclude,
+                                                          NULL,  NULL);
+
+               group  = casereader_create_filter_missing (group,
+                                                          &gvar, 1,
+                                                          tt.exclude,
+                                                          NULL,  NULL);
+
+             }
+
+           indep_run (&tt, gvar, cut, &gval0, &gval1, group);
+         }
+      }
+
+    ok = casegrouper_destroy (grouper);
+    ok = proc_commit (ds) && ok;
+  }
+
+  free (pairs);
+  free (v1);
+  free (v2);
+
+  free (tt.vars);
+
+  return ok ? CMD_SUCCESS : CMD_FAILURE;
+
+ parse_failed:
+  return CMD_FAILURE;
+}
+
diff --git a/src/language/stats/t-test.h b/src/language/stats/t-test.h

new file mode 100644 (file)

index 0000000..2b4dba4
--- /dev/null
+++ b/src/language/stats/t-test.h
@@ -0,0 +1,61 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef T_TEST_H
+#define T_TEST_H 1
+
+#include "data/missing-values.h"
+
+struct variable;
+typedef const struct variable *vp[2];
+
+enum missing_type
+  {
+    MISS_LISTWISE,
+    MISS_ANALYSIS,
+  };
+
+enum mode
+  {
+    MODE_PAIRED,
+    MODE_INDEP,
+    MODE_SINGLE,
+  };
+
+struct tt
+{
+  size_t n_vars;
+  const struct variable **vars;
+  enum mode mode;
+  enum missing_type missing_type;
+  enum mv_class exclude;
+  double confidence;
+  const struct variable *wv;
+  const struct dictionary *dict;
+};
+
+struct casereader;
+union value;
+
+void one_sample_run (const struct tt *tt, double testval, struct casereader *reader);
+void paired_run (const struct tt *tt, size_t n_pairs, vp *pairs, struct casereader *reader);
+void indep_run (struct tt *tt, const struct variable *gvar,
+               bool cut,
+               const union value *gval0, const union value *gval1,
+               struct casereader *reader);
+
+
+#endif
diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q

deleted file mode 100644 (file)

index d26fc8a..0000000
--- a/src/language/stats/t-test.q
+++ /dev/null
@@ -1,1560 +0,0 @@
-/* PSPP - a program for statistical analysis.
-   Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc.
-
-   This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation, either version 3 of the License, or
-   (at your option) any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include <gsl/gsl_cdf.h>
-#include <math.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "data/case.h"
-#include "data/casegrouper.h"
-#include "data/casereader.h"
-#include "data/dataset.h"
-#include "data/dictionary.h"
-#include "data/format.h"
-#include "data/value-labels.h"
-#include "data/variable.h"
-#include "language/command.h"
-#include "language/dictionary/split-file.h"
-#include "language/lexer/lexer.h"
-#include "language/lexer/value-parser.h"
-#include "libpspp/array.h"
-#include "libpspp/assertion.h"
-#include "libpspp/compiler.h"
-#include "libpspp/hash.h"
-#include "libpspp/message.h"
-#include "libpspp/misc.h"
-#include "libpspp/str.h"
-#include "libpspp/taint.h"
-#include "math/correlation.h"
-#include "math/group-proc.h"
-#include "math/levene.h"
-#include "output/tab.h"
-
-#include "gl/minmax.h"
-#include "gl/xalloc.h"
-#include "gl/xmemdup0.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-
-/* (headers) */
-
-/* (specification)
-   "T-TEST" (tts_):
-   +groups=custom;
-   testval=double;
-   +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC");
-   +pairs=custom;
-   missing=miss:!analysis/listwise,
-   incl:include/!exclude;
-   +format=fmt:!labels/nolabels;
-   criteria=:cin(d:criteria,"%s > 0. && %s < 1.").
-*/
-/* (declarations) */
-/* (functions) */
-
-enum comparison
-  {
-    CMP_LE,
-    CMP_EQ,
-  };
-
-/* A pair of variables to be compared. */
-struct pair
-  {
-    const struct variable *v[2]; /* The paired variables. */
-    double n;             /* The number of valid variable pairs */
-    double sum[2];        /* The sum of the members */
-    double ssq[2];        /* sum of squares of the members */
-    double std_dev[2];    /* Std deviation of the members */
-    double s_std_dev[2];  /* Sample Std deviation of the members */
-    double mean[2];       /* The means of the members */
-    double correlation;   /* Correlation coefficient between the variables. */
-    double sum_of_diffs;  /* The sum of the differences */
-    double sum_of_prod;   /* The sum of the products */
-    double mean_diff;     /* The mean of the differences */
-    double ssq_diffs;     /* The sum of the squares of the differences */
-    double std_dev_diff;  /* The std deviation of the differences */
-  };
-
-/* Which mode was T-TEST invoked */
-enum t_test_mode {
-  T_1_SAMPLE,                   /* One-sample tests. */
-  T_IND_SAMPLES,                /* Independent-sample tests. */
-  T_PAIRED                      /* Paired-sample tests. */
-};
-
-/* Total state of a T-TEST procedure. */
-struct t_test_proc
-  {
-    enum t_test_mode mode;      /* Mode that T-TEST was invoked in. */
-    double criteria;            /* Confidence interval in (0, 1). */
-    enum mv_class exclude;      /* Classes of missing values to exclude. */
-    bool listwise_missing;      /* Drop whole case if one missing var? */
-    struct fmt_spec weight_format; /* Format of weight variable. */
-
-    /* Dependent variables. */
-    const struct variable **vars;
-    size_t n_vars;
-
-    /* For mode == T_1_SAMPLE. */
-    double testval;
-
-    /* For mode == T_PAIRED only. */
-    struct pair *pairs;
-    size_t n_pairs;
-
-    /* For mode == T_IND_SAMPLES only. */
-    struct variable *indep_var; /* Independent variable. */
-    enum comparison criterion;  /* Type of comparison. */
-    double critical_value;      /* CMP_LE only: Grouping threshold value. */
-    union value g_value[2];     /* CMP_EQ only: Per-group indep var values. */
-  };
-
-/* Statistics Summary Box */
-struct ssbox
-  {
-    struct tab_table *t;
-    void (*populate) (struct ssbox *, struct t_test_proc *);
-    void (*finalize) (struct ssbox *);
-  };
-
-static void ssbox_create (struct ssbox *, struct t_test_proc *);
-static void ssbox_populate (struct ssbox *, struct t_test_proc *);
-static void ssbox_finalize (struct ssbox *);
-
-/* Paired Samples Correlation box */
-static void pscbox (struct t_test_proc *);
-
-
-/* Test Results Box. */
-struct trbox {
-  struct tab_table *t;
-  void (*populate) (struct trbox *, struct t_test_proc *);
-  void (*finalize) (struct trbox *);
-  };
-
-static void trbox_create (struct trbox *, struct t_test_proc *);
-static void trbox_populate (struct trbox *, struct t_test_proc *);
-static void trbox_finalize (struct trbox *);
-
-static void calculate (struct t_test_proc *, struct casereader *,
-                       const struct dataset *);
-
-static int compare_group_binary (const struct group_statistics *a,
-                                 const struct group_statistics *b,
-                                 const struct t_test_proc *);
-static unsigned hash_group_binary (const struct group_statistics *g,
-                                  const struct t_test_proc *p);
-
-static void t_test_proc_destroy (struct t_test_proc *proc);
-
-int
-cmd_t_test (struct lexer *lexer, struct dataset *ds)
-{
-  struct cmd_t_test cmd;
-  struct t_test_proc proc;
-  struct casegrouper *grouper;
-  struct casereader *group;
-  struct variable *wv;
-  bool ok = false;
-
-  proc.pairs = NULL;
-  proc.n_pairs = 0;
-  proc.vars = NULL;
-  proc.indep_var = NULL;
-  if (!parse_t_test (lexer, ds, &cmd, &proc))
-    goto parse_failed;
-
-  wv = dict_get_weight (dataset_dict (ds));
-  proc.weight_format = wv ? *var_get_print_format (wv) : F_8_0;
-
-  if ((cmd.sbc_testval != 0) + (cmd.sbc_groups != 0) + (cmd.sbc_pairs != 0)
-      != 1)
-    {
-      msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands "
-                 "must be specified."));
-      goto error;
-    }
-
-  proc.mode = (cmd.sbc_testval ? T_1_SAMPLE
-               : cmd.sbc_groups ? T_IND_SAMPLES
-               : T_PAIRED);
-  proc.criteria = cmd.sbc_criteria ? cmd.criteria : 0.95;
-  proc.exclude = cmd.incl != TTS_INCLUDE ? MV_ANY : MV_SYSTEM;
-  proc.listwise_missing = cmd.miss == TTS_LISTWISE;
-
-  if (proc.mode == T_1_SAMPLE)
-    proc.testval = cmd.n_testval[0];
-
-  if (proc.mode == T_PAIRED)
-    {
-      size_t i, j;
-
-      if (cmd.sbc_variables)
-       {
-         msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
-          goto error;
-       }
-
-      /* Fill proc.vars with the unique variables from pairs. */
-      proc.n_vars = proc.n_pairs * 2;
-      proc.vars = xmalloc (sizeof *proc.vars * proc.n_vars);
-      for (i = j = 0; i < proc.n_pairs; i++)
-        {
-          proc.vars[j++] = proc.pairs[i].v[0];
-          proc.vars[j++] = proc.pairs[i].v[1];
-        }
-      proc.n_vars = sort_unique (proc.vars, proc.n_vars, sizeof *proc.vars,
-                                 compare_var_ptrs_by_name, NULL);
-    }
-  else
-    {
-      if (!cmd.n_variables)
-        {
-          msg (SE, _("One or more VARIABLES must be specified."));
-          goto error;
-        }
-      proc.n_vars = cmd.n_variables;
-      proc.vars = cmd.v_variables;
-      cmd.v_variables = NULL;
-    }
-
-  /* Data pass. */
-  grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
-  while (casegrouper_get_next_group (grouper, &group))
-    calculate (&proc, group, ds);
-  ok = casegrouper_destroy (grouper);
-
-  /* Free 'proc' then commit the procedure.  Must happen in this order because
-     if proc->indep_var was created by a temporary transformation then
-     committing will destroy it.  */
-  t_test_proc_destroy (&proc);
-  ok = proc_commit (ds) && ok;
-
-  return ok ? CMD_SUCCESS : CMD_FAILURE;
-
-error:
-  free_t_test (&cmd);
-parse_failed:
-  t_test_proc_destroy (&proc);
-  return CMD_FAILURE;
-}
-
-static void
-t_test_proc_destroy (struct t_test_proc *proc)
-{
-  if (proc->indep_var != NULL)
-    {
-      int width = var_get_width (proc->indep_var);
-      value_destroy (&proc->g_value[0], width);
-      value_destroy (&proc->g_value[1], width);
-    }
-  free (proc->vars);
-  free (proc->pairs);
-}
-
-static int
-tts_custom_groups (struct lexer *lexer, struct dataset *ds,
-                   struct cmd_t_test *cmd UNUSED, void *proc_)
-{
-  struct t_test_proc *proc = proc_;
-  int n_values;
-  int width;
-
-  lex_match (lexer, T_EQUALS);
-
-  proc->indep_var = parse_variable (lexer, dataset_dict (ds));
-  if (proc->indep_var == NULL)
-    {
-      lex_error (lexer, "expecting variable name in GROUPS subcommand");
-      return 0;
-    }
-  width = var_get_width (proc->indep_var);
-  value_init (&proc->g_value[0], width);
-  value_init (&proc->g_value[1], width);
-
-  if (!lex_match (lexer, T_LPAREN))
-    n_values = 0;
-  else
-    {
-      if (!parse_value (lexer, &proc->g_value[0], proc->indep_var))
-        return 0;
-      lex_match (lexer, T_COMMA);
-      if (lex_match (lexer, T_RPAREN))
-        n_values = 1;
-      else
-        {
-          if (!parse_value (lexer, &proc->g_value[1], proc->indep_var)
-              || !lex_force_match (lexer, T_RPAREN))
-            return 0;
-          n_values = 2;
-        }
-    }
-
-  if (var_is_numeric (proc->indep_var))
-    {
-      proc->criterion = n_values == 1 ? CMP_LE : CMP_EQ;
-      if (n_values == 1)
-        proc->critical_value = proc->g_value[0].f;
-      else if (n_values == 0)
-       {
-         proc->g_value[0].f = 1;
-         proc->g_value[1].f = 2;
-       }
-    }
-  else
-    {
-      proc->criterion = CMP_EQ;
-      if (n_values != 2)
-       {
-         msg (SE, _("When applying GROUPS to a string variable, two "
-                     "values must be specified."));
-         return 0;
-       }
-    }
-  return 1;
-}
-
-static void
-add_pair (struct t_test_proc *proc,
-          const struct variable *v0, const struct variable *v1)
-{
-  struct pair *p = &proc->pairs[proc->n_pairs++];
-  p->v[0] = v0;
-  p->v[1] = v1;
-}
-
-static int
-tts_custom_pairs (struct lexer *lexer, struct dataset *ds,
-                  struct cmd_t_test *cmd UNUSED, void *proc_)
-{
-  struct t_test_proc *proc = proc_;
-
-  const struct variable **vars1 = NULL;
-  size_t n_vars1 = 0;
-
-  const struct variable **vars2 = NULL;
-  size_t n_vars2 = 0;
-
-  bool paired = false;
-
-  size_t n_total_pairs;
-  size_t i, j;
-
-  lex_match (lexer, T_EQUALS);
-
-  if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1,
-                              PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
-    return 0;
-
-  if (lex_match (lexer, T_WITH))
-    {
-      if (!parse_variables_const (lexer, dataset_dict (ds), &vars2, &n_vars2,
-                                  PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
-        {
-          free (vars1);
-          return 0;
-        }
-
-      if (lex_match (lexer, T_LPAREN)
-          && lex_match_id (lexer, "PAIRED")
-          && lex_match (lexer, T_RPAREN))
-        {
-          paired = true;
-          if (n_vars1 != n_vars2)
-            {
-              msg (SE, _("PAIRED was specified but the number of variables "
-                         "preceding WITH (%zu) did not match the number "
-                         "following (%zu)."),
-                   n_vars1, n_vars2);
-              free (vars1);
-              free (vars2);
-              return 0;
-            }
-        }
-    }
-  else
-    {
-      if (n_vars1 < 2)
-       {
-         free (vars1);
-         msg (SE, _("At least two variables must be specified on PAIRS."));
-         return 0;
-       }
-    }
-
-  /* Allocate storage for the new pairs. */
-  n_total_pairs = proc->n_pairs + (paired ? n_vars1
-                                   : n_vars2 > 0 ? n_vars1 * n_vars2
-                                   : n_vars1 * (n_vars1 - 1) / 2);
-  proc->pairs = xnrealloc (proc->pairs, n_total_pairs, sizeof *proc->pairs);
-
-  /* Populate the pairs with the appropriate variables. */
-  if (paired)
-    for (i = 0; i < n_vars1; i++)
-      add_pair (proc, vars1[i], vars2[i]);
-  else if (n_vars2 > 0)
-    for (i = 0; i < n_vars1; i++)
-      for (j = 0; j < n_vars2; j++)
-        add_pair (proc, vars1[i], vars2[j]);
-  else
-    for (i = 0; i < n_vars1; i++)
-      for (j = i + 1; j < n_vars1; j++)
-        add_pair (proc, vars1[i], vars1[j]);
-  assert (proc->n_pairs == n_total_pairs);
-
-  free (vars1);
-  free (vars2);
-  return 1;
-}
-\f
-/* Implementation of the SSBOX object. */
-
-static void ssbox_base_init (struct ssbox *, int cols, int rows);
-static void ssbox_base_finalize (struct ssbox *);
-static void ssbox_one_sample_init (struct ssbox *, struct t_test_proc *);
-static void ssbox_independent_samples_init (struct ssbox *, struct t_test_proc *);
-static void ssbox_paired_init (struct ssbox *, struct t_test_proc *);
-
-/* Factory to create an ssbox. */
-static void
-ssbox_create (struct ssbox *ssb, struct t_test_proc *proc)
-{
-  switch (proc->mode)
-    {
-    case T_1_SAMPLE:
-      ssbox_one_sample_init (ssb, proc);
-      break;
-    case T_IND_SAMPLES:
-      ssbox_independent_samples_init (ssb, proc);
-      break;
-    case T_PAIRED:
-      ssbox_paired_init (ssb, proc);
-      break;
-    default:
-      NOT_REACHED ();
-    }
-}
-
-/* Despatcher for the populate method */
-static void
-ssbox_populate (struct ssbox *ssb, struct t_test_proc *proc)
-{
-  ssb->populate (ssb, proc);
-}
-
-/* Despatcher for finalize */
-static void
-ssbox_finalize (struct ssbox *ssb)
-{
-  ssb->finalize (ssb);
-}
-
-/* Submit the box and clear up */
-static void
-ssbox_base_finalize (struct ssbox *ssb)
-{
-  tab_submit (ssb->t);
-}
-
-/* Initialize a ssbox struct */
-static void
-ssbox_base_init (struct ssbox *this, int cols, int rows)
-{
-  this->finalize = ssbox_base_finalize;
-  this->t = tab_create (cols, rows);
-
-  tab_headers (this->t, 0, 0, 1, 0);
-  tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
-  tab_hline (this->t, TAL_2, 0, cols- 1, 1);
-}
-\f
-/* ssbox implementations. */
-
-static void ssbox_one_sample_populate (struct ssbox *, struct t_test_proc *);
-static void ssbox_independent_samples_populate (struct ssbox *,
-                                                struct t_test_proc *);
-static void ssbox_paired_populate (struct ssbox *, struct t_test_proc *);
-
-/* Initialize the one_sample ssbox */
-static void
-ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc)
-{
-  const int hsize = 5;
-  const int vsize = proc->n_vars + 1;
-
-  this->populate = ssbox_one_sample_populate;
-
-  ssbox_base_init (this, hsize, vsize);
-  tab_title (this->t, _("One-Sample Statistics"));
-  tab_vline (this->t, TAL_2, 1, 0, vsize - 1);
-  tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N"));
-  tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
-  tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
-  tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
-}
-
-/* Initialize the independent samples ssbox */
-static void
-ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc)
-{
-  int hsize=6;
-  int vsize = proc->n_vars * 2 + 1;
-
-  this->populate = ssbox_independent_samples_populate;
-
-  ssbox_base_init (this, hsize, vsize);
-  tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1);
-  tab_title (this->t, _("Group Statistics"));
-  tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE,
-            var_get_name (proc->indep_var));
-  tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
-  tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
-  tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
-  tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
-}
-
-/* Populate the ssbox for independent samples */
-static void
-ssbox_independent_samples_populate (struct ssbox *ssb,
-                                    struct t_test_proc *proc)
-{
-  int i;
-
-  char *val_lab[2];
-  double indep_value[2];
-
-  char prefix[2][3];
-
-  for (i = 0; i < 2; i++)
-    {
-      union value *value = &proc->g_value[i];
-      int width = var_get_width (proc->indep_var);
-
-      indep_value[i] = (proc->criterion == CMP_LE ? proc->critical_value
-                        : value->f);
-
-      if (val_type_from_width (width) == VAL_NUMERIC)
-        {
-          const char *s = var_lookup_value_label (proc->indep_var, value);
-          val_lab[i] = s ? xstrdup (s) : xasprintf ("%g", indep_value[i]);
-        }
-      else
-        val_lab[i] = xmemdup0 (value_str (value, width), width);
-    }
-
-  if (proc->criterion == CMP_LE)
-    {
-      strcpy (prefix[0], ">=");
-      strcpy (prefix[1], "<");
-    }
-  else
-    {
-      strcpy (prefix[0], "");
-      strcpy (prefix[1], "");
-    }
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      const struct variable *var = proc->vars[i];
-      struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
-      int count=0;
-
-      tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT,
-                var_get_name (proc->vars[i]));
-      tab_text_format (ssb->t, 1, i * 2 + 1, TAB_LEFT,
-                       "%s%s", prefix[0], val_lab[0]);
-      tab_text_format (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT,
-                       "%s%s", prefix[1], val_lab[1]);
-
-      /* Fill in the group statistics */
-      for (count = 0; count < 2; count++)
-       {
-         union value search_val;
-         struct group_statistics *gs;
-
-         if (proc->criterion == CMP_LE)
-            search_val.f = proc->critical_value + (count == 0 ? 1.0 : -1.0);
-         else
-            search_val = proc->g_value[count];
-
-         gs = hsh_find (grp_hash, &search_val);
-         assert (gs);
-
-         tab_double (ssb->t, 2, i * 2 + count+ 1, TAB_RIGHT, gs->n,
-                      &proc->weight_format);
-         tab_double (ssb->t, 3, i * 2 + count+ 1, TAB_RIGHT, gs->mean, NULL);
-         tab_double (ssb->t, 4, i * 2 + count+ 1, TAB_RIGHT, gs->std_dev,
-                      NULL);
-         tab_double (ssb->t, 5, i * 2 + count+ 1, TAB_RIGHT, gs->se_mean,
-                      NULL);
-       }
-    }
-  free (val_lab[0]);
-  free (val_lab[1]);
-}
-
-/* Initialize the paired values ssbox */
-static void
-ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc)
-{
-  int hsize = 6;
-  int vsize = proc->n_pairs * 2 + 1;
-
-  this->populate = ssbox_paired_populate;
-
-  ssbox_base_init (this, hsize, vsize);
-  tab_title (this->t, _("Paired Sample Statistics"));
-  tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1);
-  tab_vline (this->t, TAL_2, 2, 0, vsize - 1);
-  tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
-  tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
-  tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
-  tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
-}
-
-/* Populate the ssbox for paired values */
-static void
-ssbox_paired_populate (struct ssbox *ssb, struct t_test_proc *proc)
-{
-  int i;
-
-  for (i = 0; i < proc->n_pairs; i++)
-    {
-      struct pair *p = &proc->pairs[i];
-      int j;
-
-      tab_text_format (ssb->t, 0, i * 2 + 1, TAB_LEFT, _("Pair %d"), i);
-      for (j=0; j < 2; j++)
-       {
-         /* Titles */
-         tab_text (ssb->t, 1, i * 2 + j + 1, TAB_LEFT,
-                    var_get_name (p->v[j]));
-
-         /* Values */
-         tab_double (ssb->t, 2, i * 2 + j + 1, TAB_RIGHT, p->mean[j], NULL);
-         tab_double (ssb->t, 3, i * 2 + j + 1, TAB_RIGHT, p->n,
-                      &proc->weight_format);
-         tab_double (ssb->t, 4, i * 2 + j + 1, TAB_RIGHT, p->std_dev[j],
-                      NULL);
-         tab_double (ssb->t, 5, i * 2 + j + 1, TAB_RIGHT,
-                     p->std_dev[j] /sqrt (p->n), NULL);
-       }
-    }
-}
-
-/* Populate the one sample ssbox */
-static void
-ssbox_one_sample_populate (struct ssbox *ssb, struct t_test_proc *proc)
-{
-  int i;
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-
-      tab_text (ssb->t, 0, i + 1, TAB_LEFT, var_get_name (proc->vars[i]));
-      tab_double (ssb->t, 1, i + 1, TAB_RIGHT, gs->n, &proc->weight_format);
-      tab_double (ssb->t, 2, i + 1, TAB_RIGHT, gs->mean, NULL);
-      tab_double (ssb->t, 3, i + 1, TAB_RIGHT, gs->std_dev, NULL);
-      tab_double (ssb->t, 4, i + 1, TAB_RIGHT, gs->se_mean, NULL);
-    }
-}
-\f
-/* Implementation of the Test Results box struct */
-
-static void trbox_base_init (struct trbox *, size_t n_vars, int cols);
-static void trbox_base_finalize (struct trbox *);
-static void trbox_independent_samples_init (struct trbox *,
-                                            struct t_test_proc *);
-static void trbox_independent_samples_populate (struct trbox *,
-                                                struct t_test_proc *);
-static void trbox_one_sample_init (struct trbox *, struct t_test_proc *);
-static void trbox_one_sample_populate (struct trbox *, struct t_test_proc *);
-static void trbox_paired_init (struct trbox *, struct t_test_proc *);
-static void trbox_paired_populate (struct trbox *, struct t_test_proc *);
-
-/* Create a trbox according to mode*/
-static void
-trbox_create (struct trbox *trb, struct t_test_proc *proc)
-{
-  switch (proc->mode)
-    {
-    case T_1_SAMPLE:
-      trbox_one_sample_init (trb, proc);
-      break;
-    case T_IND_SAMPLES:
-      trbox_independent_samples_init (trb, proc);
-      break;
-    case T_PAIRED:
-      trbox_paired_init (trb, proc);
-      break;
-    default:
-      NOT_REACHED ();
-    }
-}
-
-/* Populate a trbox according to proc */
-static void
-trbox_populate (struct trbox *trb, struct t_test_proc *proc)
-{
-  trb->populate (trb, proc);
-}
-
-/* Submit and destroy a trbox */
-static void
-trbox_finalize (struct trbox *trb)
-{
-  trb->finalize (trb);
-}
-
-/* Initialize the independent samples trbox */
-static void
-trbox_independent_samples_init (struct trbox *self,
-                                struct t_test_proc *proc)
-{
-  const int hsize = 11;
-  const int vsize = proc->n_vars * 2 + 3;
-
-  assert (self);
-  self->populate = trbox_independent_samples_populate;
-
-  trbox_base_init (self, proc->n_vars * 2, hsize);
-  tab_title (self->t, _("Independent Samples Test"));
-  tab_hline (self->t, TAL_1, 2, hsize - 1, 1);
-  tab_vline (self->t, TAL_2, 2, 0, vsize - 1);
-  tab_vline (self->t, TAL_1, 4, 0, vsize - 1);
-  tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, hsize - 2, vsize - 1);
-  tab_hline (self->t, TAL_1, hsize - 2, hsize - 1, 2);
-  tab_box (self->t, -1, -1, -1, TAL_1, hsize - 2, 2, hsize - 1, vsize - 1);
-  tab_joint_text (self->t, 2, 0, 3, 0,
-                  TAB_CENTER, _("Levene's Test for Equality of Variances"));
-  tab_joint_text (self->t, 4, 0, hsize- 1, 0,
-                  TAB_CENTER, _("t-test for Equality of Means"));
-
-  tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("F"));
-  tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig."));
-  tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("t"));
-  tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("df"));
-  tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
-  tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
-  tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference"));
-  tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
-  tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
-
-  tab_joint_text_format (self->t, 9, 1, 10, 1, TAB_CENTER,
-                         _("%g%% Confidence Interval of the Difference"),
-                         proc->criteria * 100.0);
-}
-
-/* Populate the independent samples trbox */
-static void
-trbox_independent_samples_populate (struct trbox *self,
-                                    struct t_test_proc *proc)
-{
-  int i;
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      double p, q;
-
-      double t;
-      double df;
-
-      double df1, df2;
-
-      double pooled_variance;
-      double std_err_diff;
-      double mean_diff;
-
-      double se2;
-
-      const struct variable *var = proc->vars[i];
-      struct group_proc *grp_data = group_proc_get (var);
-
-      struct hsh_table *grp_hash = grp_data->group_hash;
-
-      struct group_statistics *gs0;
-      struct group_statistics *gs1;
-
-      union value search_val;
-
-      if (proc->criterion == CMP_LE)
-       search_val.f = proc->critical_value - 1.0;
-      else
-       search_val = proc->g_value[0];
-
-      gs0 = hsh_find (grp_hash, &search_val);
-      assert (gs0);
-
-      if (proc->criterion == CMP_LE)
-       search_val.f = proc->critical_value + 1.0;
-      else
-       search_val = proc->g_value[1];
-
-      gs1 = hsh_find (grp_hash, &search_val);
-      assert (gs1);
-
-
-      tab_text (self->t, 0, i * 2 + 3, TAB_LEFT, var_get_name (proc->vars[i]));
-      tab_text (self->t, 1, i * 2 + 3, TAB_LEFT, _("Equal variances assumed"));
-      tab_double (self->t, 2, i * 2 + 3, TAB_CENTER, grp_data->levene, NULL);
-
-      /* Now work out the significance of the Levene test */
-      df1 = 1;
-      df2 = grp_data->ugs.n - 2;
-      q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2);
-      tab_double (self->t, 3, i * 2 + 3, TAB_CENTER, q, NULL);
-
-      df = gs0->n + gs1->n - 2.0;
-      tab_double (self->t, 5, i * 2 + 3, TAB_RIGHT, df, NULL);
-
-      pooled_variance = (gs0->n * pow2 (gs0->s_std_dev)
-                         + gs1->n *pow2 (gs1->s_std_dev)) / df ;
-
-      t = (gs0->mean - gs1->mean) / sqrt (pooled_variance);
-      t /= sqrt ((gs0->n + gs1->n) / (gs0->n * gs1->n));
-
-      tab_double (self->t, 4, i * 2 + 3, TAB_RIGHT, t, NULL);
-
-      p = gsl_cdf_tdist_P (t, df);
-      q = gsl_cdf_tdist_Q (t, df);
-
-      tab_double (self->t, 6, i * 2 + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p),
-                  NULL);
-
-      mean_diff = gs0->mean - gs1->mean;
-      tab_double (self->t, 7, i * 2 + 3, TAB_RIGHT, mean_diff, NULL);
-
-
-      std_err_diff = sqrt (pow2 (gs0->se_mean) + pow2 (gs1->se_mean));
-      tab_double (self->t, 8, i * 2 + 3, TAB_RIGHT, std_err_diff, NULL);
-
-      /* Now work out the confidence interval */
-      q = (1 - proc->criteria)/2.0;  /* 2-tailed test */
-
-      t = gsl_cdf_tdist_Qinv (q, df);
-      tab_double (self->t, 9, i * 2 + 3, TAB_RIGHT,
-                 mean_diff - t * std_err_diff, NULL);
-
-      tab_double (self->t, 10, i * 2 + 3, TAB_RIGHT,
-                 mean_diff + t * std_err_diff, NULL);
-
-
-      /* Now for the \sigma_1 != \sigma_2 case */
-      tab_text (self->t, 1, i * 2 + 3 + 1,
-                TAB_LEFT, _("Equal variances not assumed"));
-
-      se2 = ((pow2 (gs0->s_std_dev) / (gs0->n - 1)) +
-             (pow2 (gs1->s_std_dev) / (gs1->n - 1)));
-
-      t = mean_diff / sqrt (se2);
-      tab_double (self->t, 4, i * 2 + 3 + 1, TAB_RIGHT, t, NULL);
-
-      df = pow2 (se2) / ((pow2 (pow2 (gs0->s_std_dev) / (gs0->n - 1))
-                          / (gs0->n - 1))
-                         + (pow2 (pow2 (gs1->s_std_dev) / (gs1->n - 1))
-                            / (gs1->n - 1)));
-      tab_double (self->t, 5, i * 2 + 3 + 1, TAB_RIGHT, df, NULL);
-
-      p = gsl_cdf_tdist_P (t, df);
-      q = gsl_cdf_tdist_Q (t, df);
-
-      tab_double (self->t, 6, i * 2 + 3 + 1, TAB_RIGHT, 2.0 * (t > 0 ? q : p),
-                  NULL);
-
-      /* Now work out the confidence interval */
-      q = (1 - proc->criteria) / 2.0;  /* 2-tailed test */
-
-      t = gsl_cdf_tdist_Qinv (q, df);
-
-      tab_double (self->t, 7, i * 2 + 3 + 1, TAB_RIGHT, mean_diff, NULL);
-      tab_double (self->t, 8, i * 2 + 3 + 1, TAB_RIGHT, std_err_diff, NULL);
-      tab_double (self->t, 9, i * 2 + 3 + 1, TAB_RIGHT,
-                 mean_diff - t * std_err_diff, NULL);
-      tab_double (self->t, 10, i * 2 + 3 + 1, TAB_RIGHT,
-                 mean_diff + t * std_err_diff, NULL);
-    }
-}
-
-/* Initialize the paired samples trbox */
-static void
-trbox_paired_init (struct trbox *self, struct t_test_proc *proc)
-{
-  const int hsize=10;
-  const int vsize=proc->n_pairs+ 3;
-
-  self->populate = trbox_paired_populate;
-
-  trbox_base_init (self, proc->n_pairs, hsize);
-  tab_title (self->t, _("Paired Samples Test"));
-  tab_hline (self->t, TAL_1, 2, 6, 1);
-  tab_vline (self->t, TAL_2, 2, 0, vsize - 1);
-  tab_joint_text (self->t, 2, 0, 6, 0, TAB_CENTER, _("Paired Differences"));
-  tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, 6, vsize - 1);
-  tab_box (self->t, -1, -1, -1, TAL_1, 6, 0, hsize - 1, vsize - 1);
-  tab_hline (self->t, TAL_1, 5, 6, 2);
-  tab_vline (self->t, TAL_GAP, 6, 0, 1);
-
-  tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER,
-                         _("%g%% Confidence Interval of the Difference"),
-                         proc->criteria*100.0);
-
-  tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
-  tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
-  tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean"));
-  tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
-  tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
-  tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t"));
-  tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df"));
-  tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
-}
-
-/* Populate the paired samples trbox */
-static void
-trbox_paired_populate (struct trbox *trb,
-                       struct t_test_proc *proc)
-{
-  int i;
-
-  for (i = 0; i < proc->n_pairs; i++)
-    {
-      struct pair *pair = &proc->pairs[i];
-      double p, q;
-      double se_mean;
-
-      double n = pair->n;
-      double t;
-      double df = n - 1;
-
-      tab_text_format (trb->t, 0, i + 3, TAB_LEFT, _("Pair %d"), i);
-      tab_text_format (trb->t, 1, i + 3, TAB_LEFT, "%s - %s",
-                       var_get_name (pair->v[0]),
-                       var_get_name (pair->v[1]));
-      tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL);
-      tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL);
-
-      /* SE Mean */
-      se_mean = pair->std_dev_diff / sqrt (n);
-      tab_double (trb->t, 4, i + 3, TAB_RIGHT, se_mean, NULL);
-
-      /* Now work out the confidence interval */
-      q = (1 - proc->criteria) / 2.0;  /* 2-tailed test */
-
-      t = gsl_cdf_tdist_Qinv (q, df);
-
-      tab_double (trb->t, 5, i + 3, TAB_RIGHT,
-                 pair->mean_diff - t * se_mean, NULL);
-      tab_double (trb->t, 6, i + 3, TAB_RIGHT,
-                 pair->mean_diff + t * se_mean, NULL);
-
-      t = ((pair->mean[0] - pair->mean[1])
-           / sqrt ((pow2 (pair->s_std_dev[0]) + pow2 (pair->s_std_dev[1])
-                    - (2 * pair->correlation
-                       * pair->s_std_dev[0] * pair->s_std_dev[1]))
-                   / (n - 1)));
-
-      tab_double (trb->t, 7, i + 3, TAB_RIGHT, t, NULL);
-
-      /* Degrees of freedom */
-      tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format);
-
-      p = gsl_cdf_tdist_P (t,df);
-      q = gsl_cdf_tdist_Q (t,df);
-
-      tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL);
-    }
-}
-
-/* Initialize the one sample trbox */
-static void
-trbox_one_sample_init (struct trbox *self, struct t_test_proc *proc)
-{
-  const int hsize = 7;
-  const int vsize = proc->n_vars + 3;
-
-  self->populate = trbox_one_sample_populate;
-
-  trbox_base_init (self, proc->n_vars, hsize);
-  tab_title (self->t, _("One-Sample Test"));
-  tab_hline (self->t, TAL_1, 1, hsize - 1, 1);
-  tab_vline (self->t, TAL_2, 1, 0, vsize - 1);
-
-  tab_joint_text_format (self->t, 1, 0, hsize - 1, 0, TAB_CENTER,
-                         _("Test Value = %f"), proc->testval);
-
-  tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1);
-
-
-  tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER,
-                         _("%g%% Confidence Interval of the Difference"),
-                         proc->criteria * 100.0);
-
-  tab_vline (self->t, TAL_GAP, 6, 1, 1);
-  tab_hline (self->t, TAL_1, 5, 6, 2);
-  tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t"));
-  tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df"));
-  tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)"));
-  tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference"));
-  tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
-  tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
-}
-
-/* Populate the one sample trbox */
-static void
-trbox_one_sample_populate (struct trbox *trb, struct t_test_proc *proc)
-{
-  int i;
-
-  assert (trb->t);
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      double t;
-      double p, q;
-      double df;
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-
-      tab_text (trb->t, 0, i + 3, TAB_LEFT, var_get_name (proc->vars[i]));
-
-      t = (gs->mean - proc->testval) * sqrt (gs->n) / gs->std_dev;
-
-      tab_double (trb->t, 1, i + 3, TAB_RIGHT, t, NULL);
-
-      /* degrees of freedom */
-      df = gs->n - 1;
-
-      tab_double (trb->t, 2, i + 3, TAB_RIGHT, df, &proc->weight_format);
-
-      p = gsl_cdf_tdist_P (t, df);
-      q = gsl_cdf_tdist_Q (t, df);
-
-      /* Multiply by 2 to get 2-tailed significance, makeing sure we've got
-        the correct tail*/
-      tab_double (trb->t, 3, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL);
-      tab_double (trb->t, 4, i + 3, TAB_RIGHT, gs->mean_diff, NULL);
-
-
-      q = (1 - proc->criteria) / 2.0;  /* 2-tailed test */
-      t = gsl_cdf_tdist_Qinv (q, df);
-
-      tab_double (trb->t, 5, i + 3, TAB_RIGHT,
-                gs->mean_diff - t * gs->se_mean, NULL);
-      tab_double (trb->t, 6, i + 3, TAB_RIGHT,
-                gs->mean_diff + t * gs->se_mean, NULL);
-    }
-}
-
-/* Base initializer for the generalized trbox */
-static void
-trbox_base_init (struct trbox *self, size_t data_rows, int cols)
-{
-  const size_t rows = 3 + data_rows;
-
-  self->finalize = trbox_base_finalize;
-  self->t = tab_create (cols, rows);
-  tab_headers (self->t, 0, 0, 3, 0);
-  tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
-  tab_hline (self->t, TAL_2, 0, cols- 1, 3);
-}
-
-/* Base finalizer for the trbox */
-static void
-trbox_base_finalize (struct trbox *trb)
-{
-  tab_submit (trb->t);
-}
-
-/* Create, populate and submit the Paired Samples Correlation box */
-static void
-pscbox (struct t_test_proc *proc)
-{
-  const int rows=1+proc->n_pairs;
-  const int cols=5;
-  int i;
-
-  struct tab_table *table;
-
-  table = tab_create (cols, rows);
-
-  tab_headers (table, 0, 0, 1, 0);
-  tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
-  tab_hline (table, TAL_2, 0, cols - 1, 1);
-  tab_vline (table, TAL_2, 2, 0, rows - 1);
-  tab_title (table, _("Paired Samples Correlations"));
-
-  /* column headings */
-  tab_text (table, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
-  tab_text (table, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation"));
-  tab_text (table, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig."));
-
-  for (i = 0; i < proc->n_pairs; i++)
-    {
-      struct pair *pair = &proc->pairs[i];
-
-      /* row headings */
-      tab_text_format (table, 0, i + 1, TAB_LEFT | TAT_TITLE,
-                       _("Pair %d"), i);
-      tab_text_format (table, 1, i + 1, TAB_LEFT | TAT_TITLE,
-                       _("%s & %s"),
-                       var_get_name (pair->v[0]),
-                       var_get_name (pair->v[1]));
-
-      /* row data */
-      tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format);
-      tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL);
-
-      tab_double (table, 4, i + 1, TAB_RIGHT, 
-                 2.0 * significance_of_correlation (pair->correlation, pair->n), NULL);
-    }
-
-  tab_submit (table);
-}
-\f
-/* Calculation Implementation */
-
-/* Calculations common to all variants of the T test. */
-static void
-common_calc (const struct dictionary *dict,
-             struct t_test_proc *proc,
-             struct casereader *reader)
-{
-  struct ccase *c;
-  int i;
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-      gs->sum = 0;
-      gs->n = 0;
-      gs->ssq = 0;
-      gs->sum_diff = 0;
-    }
-
-  for (; (c = casereader_read (reader)) != NULL; case_unref (c))
-    {
-      double weight = dict_get_case_weight (dict, c, NULL);
-
-      /* Listwise has to be implicit if the independent variable
-         is missing ?? */
-      if (proc->mode == T_IND_SAMPLES)
-        {
-          if (var_is_value_missing (proc->indep_var,
-                                    case_data (c, proc->indep_var),
-                                    proc->exclude))
-            continue;
-        }
-
-      for (i = 0; i < proc->n_vars; i++)
-        {
-          const struct variable *v = proc->vars[i];
-          const union value *val = case_data (c, v);
-
-          if (!var_is_value_missing (v, val, proc->exclude))
-            {
-              struct group_statistics *gs;
-              gs = &group_proc_get (v)->ugs;
-
-              gs->n += weight;
-              gs->sum += weight * val->f;
-              gs->ssq += weight * pow2 (val->f);
-            }
-        }
-    }
-  casereader_destroy (reader);
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-
-      gs->mean = gs->sum / gs->n;
-      gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean)));
-      gs->std_dev = sqrt (gs->n / (gs->n- 1)
-                          * ((gs->ssq / gs->n) - pow2 (gs->mean)));
-      gs->se_mean = gs->std_dev / sqrt (gs->n);
-      gs->mean_diff = gs->sum_diff / gs->n;
-    }
-}
-
-/* Calculations for one sample T test. */
-static int
-one_sample_calc (const struct dictionary *dict, struct t_test_proc *proc,
-                 struct casereader *reader)
-{
-  struct ccase *c;
-  int i;
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-      gs->sum_diff = 0;
-    }
-
-  for (; (c = casereader_read (reader)) != NULL; case_unref (c))
-    {
-      double weight = dict_get_case_weight (dict, c, NULL);
-      for (i = 0; i < proc->n_vars; i++)
-        {
-          const struct variable *v = proc->vars[i];
-          struct group_statistics *gs = &group_proc_get (v)->ugs;
-          const union value *val = case_data (c, v);
-          if (!var_is_value_missing (v, val, proc->exclude))
-            gs->sum_diff += weight * (val->f - proc->testval);
-        }
-    }
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs;
-      gs->mean_diff = gs->sum_diff / gs->n;
-    }
-
-  casereader_destroy (reader);
-
-  return 0;
-}
-
-static int
-paired_calc (const struct dictionary *dict, struct t_test_proc *proc,
-             struct casereader *reader)
-{
-  struct ccase *c;
-  int i;
-
-  for (i = 0; i < proc->n_pairs; i++)
-    {
-      struct pair *pair = &proc->pairs[i];
-      pair->n = 0;
-      pair->sum[0] = pair->sum[1] = 0;
-      pair->ssq[0] = pair->ssq[1] = 0;
-      pair->sum_of_prod = 0;
-      pair->correlation = 0;
-      pair->sum_of_diffs = 0;
-      pair->ssq_diffs = 0;
-    }
-
-  for (; (c = casereader_read (reader)) != NULL; case_unref (c))
-    {
-      double weight = dict_get_case_weight (dict, c, NULL);
-      for (i = 0; i < proc->n_pairs; i++)
-        {
-          struct pair *pair = &proc->pairs[i];
-          const struct variable *v0 = pair->v[0];
-          const struct variable *v1 = pair->v[1];
-
-          const union value *val0 = case_data (c, v0);
-          const union value *val1 = case_data (c, v1);
-
-          if (!var_is_value_missing (v0, val0, proc->exclude)
-              && !var_is_value_missing (v1, val1, proc->exclude))
-            {
-              pair->n += weight;
-              pair->sum[0] += weight * val0->f;
-              pair->sum[1] += weight * val1->f;
-              pair->ssq[0] += weight * pow2 (val0->f);
-              pair->ssq[1] += weight * pow2 (val1->f);
-              pair->sum_of_prod += weight * val0->f * val1->f;
-              pair->sum_of_diffs += weight * (val0->f - val1->f);
-              pair->ssq_diffs += weight * pow2 (val0->f - val1->f);
-            }
-        }
-    }
-
-  for (i = 0; i < proc->n_pairs; i++)
-    {
-      struct pair *pair = &proc->pairs[i];
-      const double n = pair->n;
-      int j;
-
-      for (j=0; j < 2; j++)
-       {
-         pair->mean[j] = pair->sum[j] / n;
-         pair->s_std_dev[j] = sqrt ((pair->ssq[j] / n
-                                      - pow2 (pair->mean[j])));
-         pair->std_dev[j] = sqrt (n / (n- 1) * (pair->ssq[j] / n
-                                                - pow2 (pair->mean[j])));
-       }
-
-      pair->correlation = (pair->sum_of_prod / pair->n
-                           - pair->mean[0] * pair->mean[1]);
-      /* correlation now actually contains the covariance */
-      pair->correlation /= pair->std_dev[0] * pair->std_dev[1];
-      pair->correlation *= pair->n / (pair->n - 1);
-
-      pair->mean_diff = pair->sum_of_diffs / n;
-      pair->std_dev_diff = sqrt (n / (n - 1) * ((pair->ssq_diffs / n)
-                                                - pow2 (pair->mean_diff)));
-    }
-
-  casereader_destroy (reader);
-  return 0;
-}
-
-static int
-group_calc (const struct dictionary *dict, struct t_test_proc *proc,
-            struct casereader *reader)
-{
-  struct ccase *c;
-  int i;
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      struct group_proc *ttpr = group_proc_get (proc->vars[i]);
-      int j;
-
-      /* There's always 2 groups for a T - TEST */
-      ttpr->n_groups = 2;
-      ttpr->group_hash = hsh_create (2,
-                                     (hsh_compare_func *) compare_group_binary,
-                                     (hsh_hash_func *) hash_group_binary,
-                                     (hsh_free_func *) free_group,
-                                     proc);
-
-      for (j = 0; j < 2; j++)
-       {
-         struct group_statistics *gs = xmalloc (sizeof *gs);
-         gs->sum = 0;
-         gs->n = 0;
-         gs->ssq = 0;
-         if (proc->criterion == CMP_EQ)
-            gs->id = proc->g_value[j];
-         else
-           {
-             if (j == 0)
-               gs->id.f = proc->critical_value - 1.0;
-             else
-               gs->id.f = proc->critical_value + 1.0;
-           }
-
-         hsh_insert (ttpr->group_hash, gs);
-       }
-    }
-
-  for (; (c = casereader_read (reader)) != NULL; case_unref (c))
-    {
-      const double weight = dict_get_case_weight (dict, c, NULL);
-      const union value *gv;
-
-      if (var_is_value_missing (proc->indep_var,
-                                case_data (c, proc->indep_var), proc->exclude))
-        continue;
-
-      gv = case_data (c, proc->indep_var);
-      for (i = 0; i < proc->n_vars; i++)
-        {
-          const struct variable *var = proc->vars[i];
-          const union value *val = case_data (c, var);
-          struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
-          struct group_statistics *gs = hsh_find (grp_hash, gv);
-
-          /* If the independent variable doesn't match either of the values
-             for this case then move on to the next case. */
-          if (gs == NULL)
-            break;
-
-          if (!var_is_value_missing (var, val, proc->exclude))
-            {
-              gs->n += weight;
-              gs->sum += weight * val->f;
-              gs->ssq += weight * pow2 (val->f);
-            }
-        }
-    }
-
-  for (i = 0; i < proc->n_vars; i++)
-    {
-      const struct variable *var = proc->vars[i];
-      struct hsh_table *grp_hash = group_proc_get (var)->group_hash;
-      struct hsh_iterator g;
-      struct group_statistics *gs;
-      int count = 0;
-
-      for (gs = hsh_first (grp_hash, &g); gs != NULL;
-          gs = hsh_next (grp_hash, &g))
-       {
-         gs->mean = gs->sum / gs->n;
-         gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean)));
-         gs->std_dev = sqrt (gs->n / (gs->n- 1)
-                              * ((gs->ssq / gs->n) - pow2 (gs->mean)));
-         gs->se_mean = gs->std_dev / sqrt (gs->n);
-         count++;
-       }
-      assert (count == 2);
-    }
-
-  casereader_destroy (reader);
-
-  return 0;
-}
-
-
-static bool
-is_criteria_value (const struct ccase *c, void *aux)
-{
-  const struct t_test_proc *proc = aux;
-  const union value *val = case_data (c, proc->indep_var);
-  int width = var_get_width (proc->indep_var);
-
-  if ( value_equal (val, &proc->g_value[0], width))
-    return true;
-
-  if ( value_equal (val, &proc->g_value[1], width))
-    return true;
-
-  return false;
-}
-
-static void
-calculate (struct t_test_proc *proc,
-           struct casereader *input, const struct dataset *ds)
-{
-  const struct dictionary *dict = dataset_dict (ds);
-  struct ssbox stat_summary_box;
-  struct trbox test_results_box;
-  struct taint *taint;
-  struct ccase *c;
-  int i;
-  c = casereader_peek (input, 0);
-  if (c == NULL)
-    {
-      casereader_destroy (input);
-      return;
-    }
-  output_split_file_values (ds, c);
-  case_unref (c);
-
-  if (proc->listwise_missing)
-    input = casereader_create_filter_missing (input,
-                                              proc->vars,
-                                              proc->n_vars,
-                                              proc->exclude, NULL, NULL);
-  input = casereader_create_filter_weight (input, dict, NULL, NULL);
-  taint = taint_clone (casereader_get_taint (input));
-
-  common_calc (dict, proc, casereader_clone (input));
-  switch (proc->mode)
-    {
-    case T_1_SAMPLE:
-      one_sample_calc (dict, proc, input);
-      break;
-    case T_PAIRED:
-      paired_calc (dict, proc, input);
-      break;
-    case T_IND_SAMPLES:
-      group_calc (dict, proc, casereader_clone (input));
-
-      for (i = 0; i < proc->n_vars; ++i)
-       {
-         struct group_proc *grp_data = group_proc_get (proc->vars[i]);
-
-         if ( proc->criterion == CMP_EQ )
-           {
-             input = casereader_create_filter_func (input, is_criteria_value, NULL,
-                                                    proc, 
-                                                    NULL);
-           }
-
-         grp_data->levene = levene ( input, proc->indep_var, proc->vars[i], dict_get_weight (dict), proc->exclude);
-       }
-      break;
-    default:
-      NOT_REACHED ();
-    }
-
-  if (!taint_has_tainted_successor (taint))
-    {
-      ssbox_create (&stat_summary_box, proc);
-      ssbox_populate (&stat_summary_box, proc);
-      ssbox_finalize (&stat_summary_box);
-
-      if (proc->mode == T_PAIRED)
-        pscbox (proc);
-
-      trbox_create (&test_results_box, proc);
-      trbox_populate (&test_results_box, proc);
-      trbox_finalize (&test_results_box);
-    }
-
-  taint_destroy (taint);
-}
-
-/* return 0 if G belongs to group 0,
-          1 if it belongs to group 1,
-          2 if it belongs to neither group */
-static int
-which_group (const struct group_statistics *g,
-             const struct t_test_proc *proc)
-{
-  int width = var_get_width (proc->indep_var);
-
-  if (0 == value_compare_3way (&g->id, &proc->g_value[0], width))
-    return 0;
-
-  if (0 == value_compare_3way (&g->id, &proc->g_value[1], width))
-    return 1;
-
-  return 2;
-}
-
-/* Return -1 if the id of a is less than b; +1 if greater than and
-   0 if equal */
-static int
-compare_group_binary (const struct group_statistics *a,
-                      const struct group_statistics *b,
-                      const struct t_test_proc *proc)
-{
-  int flag_a;
-  int flag_b;
-
-  if (proc->criterion == CMP_LE)
-    {
-      flag_a = (a->id.f < proc->critical_value);
-      flag_b = (b->id.f < proc->critical_value);
-    }
-  else
-    {
-      flag_a = which_group (a, proc);
-      flag_b = which_group (b, proc);
-    }
-
-  if (flag_a < flag_b)
-    return - 1;
-
-  return (flag_a > flag_b);
-}
-
-/* This is a degenerate case of a hash, since it can only return three possible
-   values.  It's really a comparison, being used as a hash function */
-
-static unsigned
-hash_group_binary (const struct group_statistics *g,
-                   const struct t_test_proc *proc)
-{
-  return (proc->criterion == CMP_LE
-          ? g->id.f < proc->critical_value
-          : which_group (g, proc));
-}
-
-/*
-  Local Variables:
-  mode: c
-  End:
-*/
diff --git a/src/math/levene.c b/src/math/levene.c

index 13fad93e52107ebe5a16ca826fbbedb06dbf8928..fe04d294e1acfbf214dadea6a53717ccc9a37646 100644 (file)
--- a/src/math/levene.c
+++ b/src/math/levene.c
@@ -16,165 +16,245 @@
  
  #include <config.h>
  
-#include "math/levene.h"
-
+#include "levene.h"
  #include <math.h>
  
-#include "data/case.h"
-#include "data/casereader.h"
-#include "data/variable.h"
-#include "libpspp/hmap.h"
  #include "libpspp/misc.h"
+#include "libpspp/hmap.h"
+#include "data/value.h"
+
+#include <gl/xalloc.h>
+#include <assert.h>
  
  struct lev
  {
    struct hmap_node node;
    union value group;
-  int width;
  
    double t_bar;
    double z_mean;
    double n;
  };
  
+typedef unsigned int hash_func (const struct levene *, const union value *v);
+typedef bool cmp_func (const struct levene *, const union value *v0, const union value *v1);
+
+struct levene
+{
+  /* Width of the categorical variable */
+  int gvw ;
+
+  /* The value deviding the the groups. Valid only for dichotomous categorical variable.*/
+  const union value *cutpoint;
+
+
+  /* A hashtable of struct lev objects indexed by union value */
+  struct hmap hmap;
+
+  hash_func *hash;
+  cmp_func *cmp;
+
+
+  /* A state variable indicating how many passes have been done */
+  int pass;
+
+  double grand_n;
+  double z_grand_mean;
+
+  double denominator;
+};
+
+
+static unsigned int
+unique_hash (const struct levene *nl, const union value *val)
+{
+  return value_hash (val, nl->gvw, 0);
+}
+
+static bool
+unique_cmp (const struct levene *nl, const union value *val0, const union value *val1)
+{
+  return value_equal (val0, val1, nl->gvw);
+}
+
+static unsigned int
+cutpoint_hash (const struct levene *nl, const union value *val)
+{
+  int x = value_compare_3way (val, nl->cutpoint, nl->gvw);
+
+  return (x < 0);
+}
+
+static bool
+cutpoint_cmp (const struct levene *nl, const union value *val0, const union value *val1)
+{
+  int x = value_compare_3way (val0, nl->cutpoint, nl->gvw);
+
+  int y = value_compare_3way (val1, nl->cutpoint, nl->gvw);
+
+  if ( x == 0) x = 1;
+  if ( y == 0) y = 1;
+
+  return ( x == y);
+}
+
+
  
  static struct lev *
-find_group (struct hmap *map, const union value *target, int width)
+find_group (const struct levene *nl, const union value *target)
  {
    struct lev *l = NULL;
-  unsigned int hash = value_hash (target, width, 0);
-  HMAP_FOR_EACH_WITH_HASH (l, struct lev, node, hash, map)
+
+  HMAP_FOR_EACH_WITH_HASH (l, struct lev, node, nl->hash (nl, target), &nl->hmap)
      {
-      if (value_equal (&l->group, target, width))
+      if (nl->cmp (nl, &l->group, target))
         break;
        l = NULL;
      }
-
    return l;
  }
  
  
-double
-levene (struct casereader *rx, const struct variable *gvar,
-       const struct variable *var, const struct variable *wv,
-       enum mv_class exclude)
+struct levene *
+levene_create (int indep_width, const union value *cutpoint)
  {
-  double numerator = 0.0;
-  double denominator = 0.0;
-  int n_groups = 0;
-  double z_grand_mean = 0.0;
-  double grand_n = 0.0;
+  struct levene *nl = xzalloc (sizeof *nl);
+
+  hmap_init (&nl->hmap);
+
+  nl->gvw = indep_width;
+  nl->cutpoint = cutpoint;
  
-  struct hmap map = HMAP_INITIALIZER (map);
+  nl->hash  = cutpoint ? cutpoint_hash : unique_hash;
+  nl->cmp   = cutpoint ? cutpoint_cmp : unique_cmp;
+
+  return nl;
+}
  
-  struct ccase *c;
-  struct casereader *r = casereader_clone (rx);
  
-  for (; (c = casereader_read (r)) != NULL; case_unref (c))
+/* Data accumulation. First pass */
+void 
+levene_pass_one (struct levene *nl, double value, double weight, const union value *gv)
+{
+  struct lev *lev = find_group (nl, gv);
+
+  if ( nl->pass == 0 ) 
+    {
+      nl->pass = 1;
+    }
+  assert (nl->pass == 1);
+
+  if ( NULL == lev)
      {
-      struct lev *l = NULL;
-      const union value *target = case_data (c, gvar);
-      int width = var_get_width (gvar);
-      unsigned int hash = value_hash (target, width, 0);
-      const double x = case_data (c, var)->f;
-      const double weight = wv ? case_data (c, wv)->f : 1.0;
-
-      if (var_is_value_missing (var, case_data (c, var), exclude))
-       continue;
-
-      l = find_group (&map, target, width);
-      
-      if (l == NULL)
-       {
-         l = xzalloc (sizeof *l);
-         value_clone (&l->group, target, width);
-         hmap_insert (&map, &l->node, hash);
-       }
-
-      l->n += weight;
-      l->t_bar += x * weight;
-      grand_n += weight;
+      struct lev *l = xzalloc (sizeof *l);
+      value_clone (&l->group, gv, nl->gvw);
+      hmap_insert (&nl->hmap, &l->node, nl->hash (nl, &l->group));
+      lev = l;
      }
-  casereader_destroy (r);
  
-  {
-    struct lev *l;
-    HMAP_FOR_EACH (l, struct lev, node, &map)
+  lev->n += weight;
+  lev->t_bar += value * weight;
+
+  nl->grand_n += weight;
+}
+
+/* Data accumulation. Second pass */
+void 
+levene_pass_two (struct levene *nl, double value, double weight, const union value *gv)
+{
+  struct lev *lev = NULL;
+
+  if ( nl->pass == 1 )
+    {
+      struct lev *next;
+      struct lev *l;
+
+      nl->pass = 2;
+
+      HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
        {
         l->t_bar /= l->n;
        }
-  }
+    }
+  assert (nl->pass == 2);
  
-  n_groups = hmap_count (&map);
+  lev = find_group (nl, gv);
  
-  r = casereader_clone (rx);
-  for (; (c = casereader_read (r)) != NULL; case_unref (c))
+  lev->z_mean += fabs (value - lev->t_bar) * weight;
+  nl->z_grand_mean += fabs (value - lev->t_bar) * weight;
+}
+
+/* Data accumulation. Third pass */
+void 
+levene_pass_three (struct levene *nl, double value, double weight, const union value *gv)
+{
+  double z;
+  struct lev *lev = NULL;
+
+  if ( nl->pass == 2 )
      {
-      struct lev *l = NULL;
-      const union value *target = case_data (c, gvar);
-      int width = var_get_width (gvar);
-      const double x = case_data (c, var)->f;
-      const double weight = wv ? case_data (c, wv)->f : 1.0;
-
-      if (var_is_value_missing (var, case_data (c, var), exclude))
-       continue;
-
-      l = find_group (&map, target, width);
-      assert (l);
-      
-      l->z_mean += fabs (x - l->t_bar) * weight;
-      z_grand_mean += fabs (x - l->t_bar) * weight;
-    }
-  casereader_destroy (r);
+      struct lev *next;
+      struct lev *l;
+
+      nl->pass = 3;
  
-  {
-    struct lev *l;
-    HMAP_FOR_EACH (l, struct lev, node, &map)
+      HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
        {
         l->z_mean /= l->n;
        }
+
+      nl->z_grand_mean /= nl->grand_n;
    }
  
-  z_grand_mean /= grand_n;
+  assert (nl->pass == 3);
+  lev = find_group (nl, gv);
+
+  z = fabs (value - lev->t_bar);
+  nl->denominator += pow2 (z - lev->z_mean) * weight;
+}
+
  
-  r = casereader_clone (rx);
-  for (; (c = casereader_read (r)) != NULL; case_unref (c))
+/* Return the value of the levene statistic */
+double
+levene_calculate (struct levene *nl)
+{
+  struct lev *next;
+  struct lev *l;
+
+  double numerator = 0.0;
+  double nn = 0.0;
+  if ( nl->pass == 3 )
      {
-      double z;
-      struct lev *l;
-      const union value *target = case_data (c, gvar);
-      int width = var_get_width (gvar);
-      const double x = case_data (c, var)->f;
-      const double weight = wv ? case_data (c, wv)->f : 1.0;
+      nl->pass = 4;
+    }
  
-      if (var_is_value_missing (var, case_data (c, var), exclude))
-       continue;
+  assert (nl->pass == 4);
  
-      l = find_group (&map, target, width);
-      assert (l);
+  nl->denominator *= hmap_count (&nl->hmap) - 1;
  
-      z = fabs (x - l->t_bar);
-      denominator += pow2 (z - l->z_mean) * weight;
+  HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
+    {
+      numerator += l->n * pow2 (l->z_mean - nl->z_grand_mean);
+      nn += l->n;
      }
-  casereader_destroy (r);
  
-  denominator *= n_groups - 1;
+  numerator *= nn - hmap_count (&nl->hmap);
+    
+  return numerator / nl->denominator;
+}
  
-  {
-    double grand_n = 0.0;
-    struct lev *next;
-    struct lev *l;
-    HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &map)
-      {
-       numerator += l->n * pow2 (l->z_mean - z_grand_mean);
-       grand_n += l->n;
+void
+levene_destroy (struct levene *nl)
+{
+  struct lev *next;
+  struct lev *l;
  
-       /* We don't need these anymore */
-       free (l);
-      }
-    numerator *= grand_n - n_groups ;
-    hmap_destroy (&map);
-  }
+  HMAP_FOR_EACH_SAFE (l, next, struct lev, node, &nl->hmap)
+    {
+      value_destroy (&l->group, nl->gvw);
+      free (l);
+    }
  
-  return numerator/ denominator;
+  hmap_destroy (&nl->hmap);
+  free (nl);
  }
diff --git a/src/math/levene.h b/src/math/levene.h

index c5e8bc3b4e9a15a49bc97d6837bb680cc06ceb4b..4351ee092a127db7c1e9a82d562dc94eec1c6e1c 100644 (file)
--- a/src/math/levene.h
+++ b/src/math/levene.h
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 2004 Free Software Foundation, Inc.
+   Copyright (C) 2004, 2011 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -17,15 +17,18 @@
  #if !levene_h
  #define levene_h 1
  
-struct casereader;
-struct variable;
+struct nl;
  
+union value;
  
-enum mv_class;
+struct levene *levene_create (int indep_width, const union value *cutpoint);
  
-double
-levene (struct casereader *rx, const struct variable *gvar,
-       const struct variable *var, const struct variable *wv,  enum mv_class exclude );
+void levene_pass_one (struct levene *, double value, double weight, const union value *gv);
+void levene_pass_two (struct levene *, double value, double weight, const union value *gv);
+void levene_pass_three (struct levene *, double value, double weight, const union value *gv);
  
+double levene_calculate (struct levene*);
+
+void levene_destroy (struct levene*);
  
  #endif
diff --git a/tests/language/stats/t-test.at b/tests/language/stats/t-test.at

index 90b7ba9e594e7efc9c3d731af72402ed75323870..b97db81fd6e983879a96a2f899a60bb4bbecb8d8 100644 (file)
--- a/tests/language/stats/t-test.at
+++ b/tests/language/stats/t-test.at
@@ -231,10 +231,10 @@ DEP2,F8.0
  
  Table: Group Statistics
  ,INDEP,N,Mean,Std. Deviation,S.E. Mean
-DEP1,1.1,5,2.00,.71,.32
-,2.1,5,4.00,.71,.32
-DEP2,1.1,5,4.00,.71,.32
-,2.1,5,2.00,.71,.32
+DEP1,1.10,5,2.00,.71,.32
+,2.10,5,4.00,.71,.32
+DEP2,1.10,5,4.00,.71,.32
+,2.10,5,2.00,.71,.32
  
  Table: Independent Samples Test
  ,,Levene's Test for Equality of Variances,,t-test for Equality of Means,,,,,,
@@ -285,15 +285,15 @@ DEP,F8.0
  
  Table: Group Statistics
  ,INDEP,N,Mean,Std. Deviation,S.E. Mean
-DEP,>=1.514,11,9.00,3.82,1.15
-,<1.514,11,8.00,2.86,.86
+DEP,>=    1.51,11,9.00,3.82,1.15
+,<    1.51,11,8.00,2.86,.86
  
  Table: Independent Samples Test
  ,,Levene's Test for Equality of Variances,,t-test for Equality of Means,,,,,,
  ,,,,,,,,,95% Confidence Interval of the Difference,
  ,,F,Sig.,t,df,Sig. (2-tailed),Mean Difference,Std. Error Difference,Lower,Upper
-DEP,Equal variances assumed,.17,.68,-.69,20.00,.50,-1.00,1.44,-4.00,2.00
-,Equal variances not assumed,,,-.69,18.54,.50,-1.00,1.44,-4.02,2.02
+DEP,Equal variances assumed,.17,.68,.69,20.00,.50,1.00,1.44,-2.00,4.00
+,Equal variances not assumed,,,.69,18.54,.50,1.00,1.44,-2.02,4.02
  ])
  AT_CLEANUP
  
@@ -320,10 +320,10 @@ dep2,F8.0
  
  Table: Group Statistics
  ,indep,N,Mean,Std. Deviation,S.E. Mean
-dep1,1,3,2.50,.87,.50
-,2,2,3.25,.35,.25
-dep2,1,3,5.00,1.00,.58
-,2,2,2.00,1.41,1.00
+dep1,1.00,3,2.50,.87,.50
+,2.00,2,3.25,.35,.25
+dep2,1.00,3,5.00,1.00,.58
+,2.00,2,2.00,1.41,1.00
  
  Table: Independent Samples Test
  ,,Levene's Test for Equality of Variances,,t-test for Equality of Means,,,,,,
@@ -381,10 +381,10 @@ dep2,F8.0
  
  Table: Group Statistics
  ,indep,N,Mean,Std. Deviation,S.E. Mean
-dep1,1,3,2.50,.87,.50
-,2,3,3.50,.50,.29
-dep2,1,3,5.00,1.00,.58
-,2,3,2.00,1.00,.58
+dep1,1.00,3,2.50,.87,.50
+,2.00,3,3.50,.50,.29
+dep2,1.00,3,5.00,1.00,.58
+,2.00,3,2.00,1.00,.58
  
  Table: Independent Samples Test
  ,,Levene's Test for Equality of Variances,,t-test for Equality of Means,,,,,,
@@ -576,8 +576,8 @@ x,F8.0
  
  Table: Group Statistics
  ,ind,N,Mean,Std. Deviation,S.E. Mean
-x,1,3,2.50,.87,.50
-,2,3,3.50,.50,.29
+x,1.00,3,2.50,.87,.50
+,2.00,3,3.50,.50,.29
  
  Table: Independent Samples Test
  ,,Levene's Test for Equality of Variances,,t-test for Equality of Means,,,,,,
author	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 14:47:33 +0000 (16:47 +0200)
src/language/stats/automake.mk		patch \| blob \| history
src/language/stats/oneway.c		patch \| blob \| history
src/language/stats/t-test-indep.c	[new file with mode: 0644]	patch \| blob
src/language/stats/t-test-one-sample.c	[new file with mode: 0644]	patch \| blob
src/language/stats/t-test-paired.c	[new file with mode: 0644]	patch \| blob
src/language/stats/t-test-parser.c	[new file with mode: 0644]	patch \| blob
src/language/stats/t-test.h	[new file with mode: 0644]	patch \| blob
src/language/stats/t-test.q	[deleted file]	patch \| blob \| history
src/math/levene.c		patch \| blob \| history
src/math/levene.h		patch \| blob \| history
tests/language/stats/t-test.at		patch \| blob \| history