X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ft-test.q;h=4b082fe7c710fb670c565cc3421ee998ff01284e;hb=2be9bee9da6a2ce27715e58128569594319abfa2;hp=3b810f640270d63e0cb51748ff75bd3bda5aeb28;hpb=59981a5060a0e672b98655be240886d89a513d31;p=pspp-builds.git diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index 3b810f64..4b082fe7 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,33 +22,34 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xalloc.h" -#include "xmemdup0.h" +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/hash.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" +#include "libpspp/taint.h" +#include "math/correlation.h" +#include "math/group-proc.h" +#include "math/levene.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -163,6 +164,8 @@ static int compare_group_binary (const struct group_statistics *a, static unsigned hash_group_binary (const struct group_statistics *g, const struct t_test_proc *p); +static void t_test_proc_destroy (struct t_test_proc *proc); + int cmd_t_test (struct lexer *lexer, struct dataset *ds) { @@ -188,7 +191,7 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds) { msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands " "must be specified.")); - goto done; + goto error; } proc.mode = (cmd.sbc_testval ? T_1_SAMPLE @@ -208,7 +211,7 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds) if (cmd.sbc_variables) { msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); - goto done; + goto error; } /* Fill proc.vars with the unique variables from pairs. */ @@ -227,7 +230,7 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds) if (!cmd.n_variables) { msg (SE, _("One or more VARIABLES must be specified.")); - goto done; + goto error; } proc.n_vars = cmd.n_variables; proc.vars = cmd.v_variables; @@ -239,31 +242,33 @@ cmd_t_test (struct lexer *lexer, struct dataset *ds) while (casegrouper_get_next_group (grouper, &group)) calculate (&proc, group, ds); ok = casegrouper_destroy (grouper); + + /* Free 'proc' then commit the procedure. Must happen in this order because + if proc->indep_var was created by a temporary transformation then + committing will destroy it. */ + t_test_proc_destroy (&proc); ok = proc_commit (ds) && ok; - if (proc.mode == T_IND_SAMPLES) - { - int v; - /* Destroy any group statistics we created */ - for (v = 0; v < proc.n_vars; v++) - { - struct group_proc *grpp = group_proc_get (proc.vars[v]); - hsh_destroy (grpp->group_hash); - } - } + return ok ? CMD_SUCCESS : CMD_FAILURE; -done: +error: free_t_test (&cmd); parse_failed: - if (proc.indep_var != NULL) + t_test_proc_destroy (&proc); + return CMD_FAILURE; +} + +static void +t_test_proc_destroy (struct t_test_proc *proc) +{ + if (proc->indep_var != NULL) { - int width = var_get_width (proc.indep_var); - value_destroy (&proc.g_value[0], width); - value_destroy (&proc.g_value[1], width); + int width = var_get_width (proc->indep_var); + value_destroy (&proc->g_value[0], width); + value_destroy (&proc->g_value[1], width); } - free (proc.vars); - free (proc.pairs); - return ok ? CMD_SUCCESS : CMD_FAILURE; + free (proc->vars); + free (proc->pairs); } static int @@ -274,7 +279,7 @@ tts_custom_groups (struct lexer *lexer, struct dataset *ds, int n_values; int width; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); proc->indep_var = parse_variable (lexer, dataset_dict (ds)); if (proc->indep_var == NULL) @@ -286,19 +291,19 @@ tts_custom_groups (struct lexer *lexer, struct dataset *ds, value_init (&proc->g_value[0], width); value_init (&proc->g_value[1], width); - if (!lex_match (lexer, '(')) + if (!lex_match (lexer, T_LPAREN)) n_values = 0; else { if (!parse_value (lexer, &proc->g_value[0], width)) return 0; - lex_match (lexer, ','); - if (lex_match (lexer, ')')) + lex_match (lexer, T_COMMA); + if (lex_match (lexer, T_RPAREN)) n_values = 1; else { if (!parse_value (lexer, &proc->g_value[1], width) - || !lex_force_match (lexer, ')')) + || !lex_force_match (lexer, T_RPAREN)) return 0; n_values = 2; } @@ -354,7 +359,7 @@ tts_custom_pairs (struct lexer *lexer, struct dataset *ds, size_t n_total_pairs; size_t i, j; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) @@ -369,9 +374,9 @@ tts_custom_pairs (struct lexer *lexer, struct dataset *ds, return 0; } - if (lex_match (lexer, '(') + if (lex_match (lexer, T_LPAREN) && lex_match_id (lexer, "PAIRED") - && lex_match (lexer, ')')) + && lex_match (lexer, T_RPAREN)) { paired = true; if (n_vars1 != n_vars2) @@ -475,13 +480,11 @@ static void ssbox_base_init (struct ssbox *this, int cols, int rows) { this->finalize = ssbox_base_finalize; - this->t = tab_create (cols, rows, 0); + this->t = tab_create (cols, rows); - tab_columns (this->t, SOM_COL_DOWN); tab_headers (this->t, 0, 0, 1, 0); tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (this->t, TAL_2, 0, cols- 1, 1); - tab_dim (this->t, tab_natural_dimensions, NULL, NULL); } /* ssbox implementations. */ @@ -506,7 +509,7 @@ ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Initialize the independent samples ssbox */ @@ -526,7 +529,7 @@ ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Populate the ssbox for independent samples */ @@ -625,7 +628,7 @@ ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Populate the ssbox for paired values */ @@ -974,8 +977,8 @@ trbox_paired_populate (struct trbox *trb, /* Degrees of freedom */ tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format); - p = gsl_cdf_tdist_P (t, df); - q = gsl_cdf_tdist_P (t, df); + p = gsl_cdf_tdist_P (t,df); + q = gsl_cdf_tdist_Q (t,df); tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); } @@ -1067,11 +1070,10 @@ trbox_base_init (struct trbox *self, size_t data_rows, int cols) const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; - self->t = tab_create (cols, rows, 0); + self->t = tab_create (cols, rows); tab_headers (self->t, 0, 0, 3, 0); tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); tab_hline (self->t, TAL_2, 0, cols- 1, 3); - tab_dim (self->t, tab_natural_dimensions, NULL, NULL); } /* Base finalizer for the trbox */ @@ -1091,14 +1093,12 @@ pscbox (struct t_test_proc *proc) struct tab_table *table; - table = tab_create (cols, rows, 0); + table = tab_create (cols, rows); - tab_columns (table, SOM_COL_DOWN); tab_headers (table, 0, 0, 1, 0); tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (table, TAL_2, 0, cols - 1, 1); tab_vline (table, TAL_2, 2, 0, rows - 1); - tab_dim (table, tab_natural_dimensions, NULL, NULL); tab_title (table, _("Paired Samples Correlations")); /* column headings */ @@ -1109,10 +1109,6 @@ pscbox (struct t_test_proc *proc) for (i = 0; i < proc->n_pairs; i++) { struct pair *pair = &proc->pairs[i]; - double p, q; - double df = pair->n -2; - double correlation_t = (pair->correlation * sqrt (df) / - sqrt (1 - pow2 (pair->correlation))); /* row headings */ tab_text_format (table, 0, i + 1, TAB_LEFT | TAT_TITLE, @@ -1126,10 +1122,8 @@ pscbox (struct t_test_proc *proc) tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format); tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL); - p = gsl_cdf_tdist_P (correlation_t, df); - q = gsl_cdf_tdist_Q (correlation_t, df); - tab_double (table, 4, i + 1, TAB_RIGHT, - 2.0 * (correlation_t > 0 ? q : p), NULL); + tab_double (table, 4, i + 1, TAB_RIGHT, + 2.0 * significance_of_correlation (pair->correlation, pair->n), NULL); } tab_submit (table); @@ -1411,6 +1405,23 @@ group_calc (const struct dictionary *dict, struct t_test_proc *proc, return 0; } + +static bool +is_criteria_value (const struct ccase *c, void *aux) +{ + const struct t_test_proc *proc = aux; + const union value *val = case_data (c, proc->indep_var); + int width = var_get_width (proc->indep_var); + + if ( value_equal (val, &proc->g_value[0], width)) + return true; + + if ( value_equal (val, &proc->g_value[1], width)) + return true; + + return false; +} + static void calculate (struct t_test_proc *proc, struct casereader *input, const struct dataset *ds) @@ -1420,7 +1431,7 @@ calculate (struct t_test_proc *proc, struct trbox test_results_box; struct taint *taint; struct ccase *c; - + int i; c = casereader_peek (input, 0); if (c == NULL) { @@ -1449,8 +1460,20 @@ calculate (struct t_test_proc *proc, break; case T_IND_SAMPLES: group_calc (dict, proc, casereader_clone (input)); - levene (dict, input, proc->indep_var, proc->n_vars, proc->vars, - proc->exclude); + + for (i = 0; i < proc->n_vars; ++i) + { + struct group_proc *grp_data = group_proc_get (proc->vars[i]); + + if ( proc->criterion == CMP_EQ ) + { + input = casereader_create_filter_func (input, is_criteria_value, NULL, + proc, + NULL); + } + + grp_data->levene = levene ( input, proc->indep_var, proc->vars[i], dict_get_weight (dict), proc->exclude); + } break; default: NOT_REACHED ();