/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <stdio.h>
#include <stdlib.h>
-#include <data/case.h>
-#include <data/casegrouper.h>
-#include <data/casereader.h>
-#include <data/dictionary.h>
-#include <data/procedure.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <language/command.h>
-#include <language/dictionary/split-file.h>
-#include <language/lexer/lexer.h>
-#include <libpspp/array.h>
-#include <libpspp/assertion.h>
-#include <libpspp/compiler.h>
-#include <libpspp/hash.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/str.h>
-#include <libpspp/taint.h>
-#include <math/group-proc.h>
-#include <math/levene.h>
-#include <output/manager.h>
-#include <output/table.h>
-#include <data/format.h>
-
-#include "xalloc.h"
-#include "xmemdup0.h"
+#include "data/case.h"
+#include "data/casegrouper.h"
+#include "data/casereader.h"
+#include "data/dataset.h"
+#include "data/dictionary.h"
+#include "data/format.h"
+#include "data/value-labels.h"
+#include "data/variable.h"
+#include "language/command.h"
+#include "language/dictionary/split-file.h"
+#include "language/lexer/lexer.h"
+#include "language/lexer/value-parser.h"
+#include "libpspp/array.h"
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/hash.h"
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+#include "libpspp/str.h"
+#include "libpspp/taint.h"
+#include "math/correlation.h"
+#include "math/group-proc.h"
+#include "math/levene.h"
+#include "output/tab.h"
+
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
+#include "gl/xmemdup0.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
union value g_value[2]; /* CMP_EQ only: Per-group indep var values. */
};
-static int parse_value (struct lexer *, union value *, int width);
-
/* Statistics Summary Box */
struct ssbox
{
static unsigned hash_group_binary (const struct group_statistics *g,
const struct t_test_proc *p);
+static void t_test_proc_destroy (struct t_test_proc *proc);
+
int
cmd_t_test (struct lexer *lexer, struct dataset *ds)
{
{
msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands "
"must be specified."));
- goto done;
+ goto error;
}
proc.mode = (cmd.sbc_testval ? T_1_SAMPLE
if (cmd.sbc_variables)
{
msg (SE, _("VARIABLES subcommand may not be used with PAIRS."));
- goto done;
+ goto error;
}
/* Fill proc.vars with the unique variables from pairs. */
if (!cmd.n_variables)
{
msg (SE, _("One or more VARIABLES must be specified."));
- goto done;
+ goto error;
}
proc.n_vars = cmd.n_variables;
proc.vars = cmd.v_variables;
while (casegrouper_get_next_group (grouper, &group))
calculate (&proc, group, ds);
ok = casegrouper_destroy (grouper);
+
+ /* Free 'proc' then commit the procedure. Must happen in this order because
+ if proc->indep_var was created by a temporary transformation then
+ committing will destroy it. */
+ t_test_proc_destroy (&proc);
ok = proc_commit (ds) && ok;
- if (proc.mode == T_IND_SAMPLES)
- {
- int v;
- /* Destroy any group statistics we created */
- for (v = 0; v < proc.n_vars; v++)
- {
- struct group_proc *grpp = group_proc_get (proc.vars[v]);
- hsh_destroy (grpp->group_hash);
- }
- }
+ return ok ? CMD_SUCCESS : CMD_FAILURE;
-done:
+error:
free_t_test (&cmd);
parse_failed:
- if (proc.indep_var != NULL)
+ t_test_proc_destroy (&proc);
+ return CMD_FAILURE;
+}
+
+static void
+t_test_proc_destroy (struct t_test_proc *proc)
+{
+ if (proc->indep_var != NULL)
{
- int width = var_get_width (proc.indep_var);
- value_destroy (&proc.g_value[0], width);
- value_destroy (&proc.g_value[1], width);
+ int width = var_get_width (proc->indep_var);
+ value_destroy (&proc->g_value[0], width);
+ value_destroy (&proc->g_value[1], width);
}
- free (proc.vars);
- free (proc.pairs);
- return ok ? CMD_SUCCESS : CMD_FAILURE;
+ free (proc->vars);
+ free (proc->pairs);
}
static int
int n_values;
int width;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
proc->indep_var = parse_variable (lexer, dataset_dict (ds));
if (proc->indep_var == NULL)
value_init (&proc->g_value[0], width);
value_init (&proc->g_value[1], width);
- if (!lex_match (lexer, '('))
+ if (!lex_match (lexer, T_LPAREN))
n_values = 0;
else
{
- if (!parse_value (lexer, &proc->g_value[0], width))
+ if (!parse_value (lexer, &proc->g_value[0], proc->indep_var))
return 0;
- lex_match (lexer, ',');
- if (lex_match (lexer, ')'))
+ lex_match (lexer, T_COMMA);
+ if (lex_match (lexer, T_RPAREN))
n_values = 1;
else
{
- if (!parse_value (lexer, &proc->g_value[1], width)
- || !lex_force_match (lexer, ')'))
+ if (!parse_value (lexer, &proc->g_value[1], proc->indep_var)
+ || !lex_force_match (lexer, T_RPAREN))
return 0;
n_values = 2;
}
size_t n_total_pairs;
size_t i, j;
- lex_match (lexer, '=');
+ lex_match (lexer, T_EQUALS);
if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1,
PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH))
return 0;
}
- if (lex_match (lexer, '(')
+ if (lex_match (lexer, T_LPAREN)
&& lex_match_id (lexer, "PAIRED")
- && lex_match (lexer, ')'))
+ && lex_match (lexer, T_RPAREN))
{
paired = true;
if (n_vars1 != n_vars2)
free (vars2);
return 1;
}
-
-/* Parses the current token (numeric or string, depending on type)
- value v and returns success. */
-static int
-parse_value (struct lexer *lexer, union value *v, int width)
-{
- if (width == 0)
- {
- if (!lex_force_num (lexer))
- return 0;
- v->f = lex_tokval (lexer);
- }
- else
- {
- if (!lex_force_string (lexer))
- return 0;
- value_copy_str_rpad (v, width, ds_cstr (lex_tokstr (lexer)), ' ');
- }
-
- lex_get (lexer);
-
- return 1;
-}
\f
/* Implementation of the SSBOX object. */
ssbox_base_init (struct ssbox *this, int cols, int rows)
{
this->finalize = ssbox_base_finalize;
- this->t = tab_create (cols, rows, 0);
+ this->t = tab_create (cols, rows);
- tab_columns (this->t, SOM_COL_DOWN, 1);
tab_headers (this->t, 0, 0, 1, 0);
tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
tab_hline (this->t, TAL_2, 0, cols- 1, 1);
- tab_dim (this->t, tab_natural_dimensions, NULL);
}
\f
/* ssbox implementations. */
tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N"));
tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
- tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
+ tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
}
/* Initialize the independent samples ssbox */
tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N"));
tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
- tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
+ tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
}
/* Populate the ssbox for independent samples */
tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT,
var_get_name (proc->vars[i]));
- tab_text (ssb->t, 1, i * 2 + 1, TAB_LEFT | TAT_PRINTF,
- "%s%s", prefix[0], val_lab[0]);
- tab_text (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT | TAT_PRINTF,
- "%s%s", prefix[1], val_lab[1]);
+ tab_text_format (ssb->t, 1, i * 2 + 1, TAB_LEFT,
+ "%s%s", prefix[0], val_lab[0]);
+ tab_text_format (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT,
+ "%s%s", prefix[1], val_lab[1]);
/* Fill in the group statistics */
for (count = 0; count < 2; count++)
tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N"));
tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
- tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean"));
+ tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean"));
}
/* Populate the ssbox for paired values */
struct pair *p = &proc->pairs[i];
int j;
- tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i);
+ tab_text_format (ssb->t, 0, i * 2 + 1, TAB_LEFT, _("Pair %d"), i);
for (j=0; j < 2; j++)
{
/* Titles */
tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower"));
tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper"));
- tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF,
- _("%g%% Confidence Interval of the Difference"),
- proc->criteria * 100.0);
+ tab_joint_text_format (self->t, 9, 1, 10, 1, TAB_CENTER,
+ _("%g%% Confidence Interval of the Difference"),
+ proc->criteria * 100.0);
}
/* Populate the independent samples trbox */
tab_hline (self->t, TAL_1, 5, 6, 2);
tab_vline (self->t, TAL_GAP, 6, 0, 1);
- tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
- _("%g%% Confidence Interval of the Difference"),
- proc->criteria*100.0);
+ tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER,
+ _("%g%% Confidence Interval of the Difference"),
+ proc->criteria*100.0);
tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean"));
tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation"));
double t;
double df = n - 1;
- tab_text (trb->t, 0, i + 3, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i);
- tab_text (trb->t, 1, i + 3, TAB_LEFT | TAT_PRINTF, "%s - %s",
- var_get_name (pair->v[0]),
- var_get_name (pair->v[1]));
+ tab_text_format (trb->t, 0, i + 3, TAB_LEFT, _("Pair %d"), i);
+ tab_text_format (trb->t, 1, i + 3, TAB_LEFT, "%s - %s",
+ var_get_name (pair->v[0]),
+ var_get_name (pair->v[1]));
tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL);
tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL);
/* Degrees of freedom */
tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format);
- p = gsl_cdf_tdist_P (t, df);
- q = gsl_cdf_tdist_P (t, df);
+ p = gsl_cdf_tdist_P (t,df);
+ q = gsl_cdf_tdist_Q (t,df);
tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL);
}
tab_hline (self->t, TAL_1, 1, hsize - 1, 1);
tab_vline (self->t, TAL_2, 1, 0, vsize - 1);
- tab_joint_text (self->t, 1, 0, hsize - 1, 0, TAB_CENTER | TAT_PRINTF,
- _("Test Value = %f"), proc->testval);
+ tab_joint_text_format (self->t, 1, 0, hsize - 1, 0, TAB_CENTER,
+ _("Test Value = %f"), proc->testval);
tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1);
- tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF,
- _("%g%% Confidence Interval of the Difference"),
- proc->criteria * 100.0);
+ tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER,
+ _("%g%% Confidence Interval of the Difference"),
+ proc->criteria * 100.0);
tab_vline (self->t, TAL_GAP, 6, 1, 1);
tab_hline (self->t, TAL_1, 5, 6, 2);
const size_t rows = 3 + data_rows;
self->finalize = trbox_base_finalize;
- self->t = tab_create (cols, rows, 0);
+ self->t = tab_create (cols, rows);
tab_headers (self->t, 0, 0, 3, 0);
tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1);
tab_hline (self->t, TAL_2, 0, cols- 1, 3);
- tab_dim (self->t, tab_natural_dimensions, NULL);
}
/* Base finalizer for the trbox */
struct tab_table *table;
- table = tab_create (cols, rows, 0);
+ table = tab_create (cols, rows);
- tab_columns (table, SOM_COL_DOWN, 1);
tab_headers (table, 0, 0, 1, 0);
tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1);
tab_hline (table, TAL_2, 0, cols - 1, 1);
tab_vline (table, TAL_2, 2, 0, rows - 1);
- tab_dim (table, tab_natural_dimensions, NULL);
tab_title (table, _("Paired Samples Correlations"));
/* column headings */
for (i = 0; i < proc->n_pairs; i++)
{
struct pair *pair = &proc->pairs[i];
- double p, q;
- double df = pair->n -2;
- double correlation_t = (pair->correlation * sqrt (df) /
- sqrt (1 - pow2 (pair->correlation)));
/* row headings */
- tab_text (table, 0, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
- _("Pair %d"), i);
- tab_text (table, 1, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF,
- _("%s & %s"),
- var_get_name (pair->v[0]),
- var_get_name (pair->v[1]));
+ tab_text_format (table, 0, i + 1, TAB_LEFT | TAT_TITLE,
+ _("Pair %d"), i);
+ tab_text_format (table, 1, i + 1, TAB_LEFT | TAT_TITLE,
+ _("%s & %s"),
+ var_get_name (pair->v[0]),
+ var_get_name (pair->v[1]));
/* row data */
tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format);
tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL);
- p = gsl_cdf_tdist_P (correlation_t, df);
- q = gsl_cdf_tdist_Q (correlation_t, df);
- tab_double (table, 4, i + 1, TAB_RIGHT,
- 2.0 * (correlation_t > 0 ? q : p), NULL);
+ tab_double (table, 4, i + 1, TAB_RIGHT,
+ 2.0 * significance_of_correlation (pair->correlation, pair->n), NULL);
}
tab_submit (table);
return 0;
}
+
+static bool
+is_criteria_value (const struct ccase *c, void *aux)
+{
+ const struct t_test_proc *proc = aux;
+ const union value *val = case_data (c, proc->indep_var);
+ int width = var_get_width (proc->indep_var);
+
+ if ( value_equal (val, &proc->g_value[0], width))
+ return true;
+
+ if ( value_equal (val, &proc->g_value[1], width))
+ return true;
+
+ return false;
+}
+
static void
calculate (struct t_test_proc *proc,
struct casereader *input, const struct dataset *ds)
struct trbox test_results_box;
struct taint *taint;
struct ccase *c;
-
+ int i;
c = casereader_peek (input, 0);
if (c == NULL)
{
break;
case T_IND_SAMPLES:
group_calc (dict, proc, casereader_clone (input));
- levene (dict, input, proc->indep_var, proc->n_vars, proc->vars,
- proc->exclude);
+
+ for (i = 0; i < proc->n_vars; ++i)
+ {
+ struct group_proc *grp_data = group_proc_get (proc->vars[i]);
+
+ if ( proc->criterion == CMP_EQ )
+ {
+ input = casereader_create_filter_func (input, is_criteria_value, NULL,
+ proc,
+ NULL);
+ }
+
+ grp_data->levene = levene ( input, proc->indep_var, proc->vars[i], dict_get_weight (dict), proc->exclude);
+ }
break;
default:
NOT_REACHED ();