X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Ft-test.q;h=3f3fa5d6e0d795f64940def95c82e62fec40705e;hb=d723af7eca95b73a618ae5fe831f380239550a23;hp=a3f4cf8f522516d810ae29948a8402becd01c636;hpb=5c3291dc396b795696e94f47780308fd7ace6fc4;p=pspp-builds.git diff --git a/src/language/stats/t-test.q b/src/language/stats/t-test.q index a3f4cf8f..3f3fa5d6 100644 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -42,10 +43,11 @@ #include #include #include -#include -#include +#include +#include #include +#include "minmax.h" #include "xalloc.h" #include "xmemdup0.h" @@ -126,8 +128,6 @@ struct t_test_proc union value g_value[2]; /* CMP_EQ only: Per-group indep var values. */ }; -static int parse_value (struct lexer *, union value *, int width); - /* Statistics Summary Box */ struct ssbox { @@ -275,7 +275,7 @@ tts_custom_groups (struct lexer *lexer, struct dataset *ds, int n_values; int width; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); proc->indep_var = parse_variable (lexer, dataset_dict (ds)); if (proc->indep_var == NULL) @@ -287,19 +287,19 @@ tts_custom_groups (struct lexer *lexer, struct dataset *ds, value_init (&proc->g_value[0], width); value_init (&proc->g_value[1], width); - if (!lex_match (lexer, '(')) + if (!lex_match (lexer, T_LPAREN)) n_values = 0; else { if (!parse_value (lexer, &proc->g_value[0], width)) return 0; - lex_match (lexer, ','); - if (lex_match (lexer, ')')) + lex_match (lexer, T_COMMA); + if (lex_match (lexer, T_RPAREN)) n_values = 1; else { if (!parse_value (lexer, &proc->g_value[1], width) - || !lex_force_match (lexer, ')')) + || !lex_force_match (lexer, T_RPAREN)) return 0; n_values = 2; } @@ -355,7 +355,7 @@ tts_custom_pairs (struct lexer *lexer, struct dataset *ds, size_t n_total_pairs; size_t i, j; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) @@ -370,9 +370,9 @@ tts_custom_pairs (struct lexer *lexer, struct dataset *ds, return 0; } - if (lex_match (lexer, '(') + if (lex_match (lexer, T_LPAREN) && lex_match_id (lexer, "PAIRED") - && lex_match (lexer, ')')) + && lex_match (lexer, T_RPAREN)) { paired = true; if (n_vars1 != n_vars2) @@ -421,29 +421,6 @@ tts_custom_pairs (struct lexer *lexer, struct dataset *ds, free (vars2); return 1; } - -/* Parses the current token (numeric or string, depending on type) - value v and returns success. */ -static int -parse_value (struct lexer *lexer, union value *v, int width) -{ - if (width == 0) - { - if (!lex_force_num (lexer)) - return 0; - v->f = lex_tokval (lexer); - } - else - { - if (!lex_force_string (lexer)) - return 0; - value_copy_str_rpad (v, width, ds_cstr (lex_tokstr (lexer)), ' '); - } - - lex_get (lexer); - - return 1; -} /* Implementation of the SSBOX object. */ @@ -499,13 +476,11 @@ static void ssbox_base_init (struct ssbox *this, int cols, int rows) { this->finalize = ssbox_base_finalize; - this->t = tab_create (cols, rows, 0); + this->t = tab_create (cols, rows); - tab_columns (this->t, SOM_COL_DOWN, 1); tab_headers (this->t, 0, 0, 1, 0); tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (this->t, TAL_2, 0, cols- 1, 1); - tab_dim (this->t, tab_natural_dimensions, NULL); } /* ssbox implementations. */ @@ -530,7 +505,7 @@ ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Initialize the independent samples ssbox */ @@ -550,7 +525,7 @@ ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Populate the ssbox for independent samples */ @@ -601,10 +576,10 @@ ssbox_independent_samples_populate (struct ssbox *ssb, tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT, var_get_name (proc->vars[i])); - tab_text (ssb->t, 1, i * 2 + 1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[0], val_lab[0]); - tab_text (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[1], val_lab[1]); + tab_text_format (ssb->t, 1, i * 2 + 1, TAB_LEFT, + "%s%s", prefix[0], val_lab[0]); + tab_text_format (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT, + "%s%s", prefix[1], val_lab[1]); /* Fill in the group statistics */ for (count = 0; count < 2; count++) @@ -649,7 +624,7 @@ ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc) tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("S.E. Mean")); } /* Populate the ssbox for paired values */ @@ -663,7 +638,7 @@ ssbox_paired_populate (struct ssbox *ssb, struct t_test_proc *proc) struct pair *p = &proc->pairs[i]; int j; - tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i); + tab_text_format (ssb->t, 0, i * 2 + 1, TAB_LEFT, _("Pair %d"), i); for (j=0; j < 2; j++) { /* Titles */ @@ -781,9 +756,9 @@ trbox_independent_samples_init (struct trbox *self, tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower")); tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper")); - tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _("%g%% Confidence Interval of the Difference"), - proc->criteria * 100.0); + tab_joint_text_format (self->t, 9, 1, 10, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); } /* Populate the independent samples trbox */ @@ -935,9 +910,9 @@ trbox_paired_init (struct trbox *self, struct t_test_proc *proc) tab_hline (self->t, TAL_1, 5, 6, 2); tab_vline (self->t, TAL_GAP, 6, 0, 1); - tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _("%g%% Confidence Interval of the Difference"), - proc->criteria*100.0); + tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria*100.0); tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); @@ -966,10 +941,10 @@ trbox_paired_populate (struct trbox *trb, double t; double df = n - 1; - tab_text (trb->t, 0, i + 3, TAB_LEFT | TAT_PRINTF, _("Pair %d"), i); - tab_text (trb->t, 1, i + 3, TAB_LEFT | TAT_PRINTF, "%s - %s", - var_get_name (pair->v[0]), - var_get_name (pair->v[1])); + tab_text_format (trb->t, 0, i + 3, TAB_LEFT, _("Pair %d"), i); + tab_text_format (trb->t, 1, i + 3, TAB_LEFT, "%s - %s", + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL); tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL); @@ -998,8 +973,8 @@ trbox_paired_populate (struct trbox *trb, /* Degrees of freedom */ tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format); - p = gsl_cdf_tdist_P (t, df); - q = gsl_cdf_tdist_P (t, df); + p = gsl_cdf_tdist_P (t,df); + q = gsl_cdf_tdist_Q (t,df); tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); } @@ -1019,15 +994,15 @@ trbox_one_sample_init (struct trbox *self, struct t_test_proc *proc) tab_hline (self->t, TAL_1, 1, hsize - 1, 1); tab_vline (self->t, TAL_2, 1, 0, vsize - 1); - tab_joint_text (self->t, 1, 0, hsize - 1, 0, TAB_CENTER | TAT_PRINTF, - _("Test Value = %f"), proc->testval); + tab_joint_text_format (self->t, 1, 0, hsize - 1, 0, TAB_CENTER, + _("Test Value = %f"), proc->testval); tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1); - tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _("%g%% Confidence Interval of the Difference"), - proc->criteria * 100.0); + tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); tab_vline (self->t, TAL_GAP, 6, 1, 1); tab_hline (self->t, TAL_1, 5, 6, 2); @@ -1091,11 +1066,10 @@ trbox_base_init (struct trbox *self, size_t data_rows, int cols) const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; - self->t = tab_create (cols, rows, 0); + self->t = tab_create (cols, rows); tab_headers (self->t, 0, 0, 3, 0); tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); tab_hline (self->t, TAL_2, 0, cols- 1, 3); - tab_dim (self->t, tab_natural_dimensions, NULL); } /* Base finalizer for the trbox */ @@ -1115,14 +1089,12 @@ pscbox (struct t_test_proc *proc) struct tab_table *table; - table = tab_create (cols, rows, 0); + table = tab_create (cols, rows); - tab_columns (table, SOM_COL_DOWN, 1); tab_headers (table, 0, 0, 1, 0); tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (table, TAL_2, 0, cols - 1, 1); tab_vline (table, TAL_2, 2, 0, rows - 1); - tab_dim (table, tab_natural_dimensions, NULL); tab_title (table, _("Paired Samples Correlations")); /* column headings */ @@ -1133,27 +1105,21 @@ pscbox (struct t_test_proc *proc) for (i = 0; i < proc->n_pairs; i++) { struct pair *pair = &proc->pairs[i]; - double p, q; - double df = pair->n -2; - double correlation_t = (pair->correlation * sqrt (df) / - sqrt (1 - pow2 (pair->correlation))); /* row headings */ - tab_text (table, 0, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("Pair %d"), i); - tab_text (table, 1, i + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("%s & %s"), - var_get_name (pair->v[0]), - var_get_name (pair->v[1])); + tab_text_format (table, 0, i + 1, TAB_LEFT | TAT_TITLE, + _("Pair %d"), i); + tab_text_format (table, 1, i + 1, TAB_LEFT | TAT_TITLE, + _("%s & %s"), + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); /* row data */ tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format); tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL); - p = gsl_cdf_tdist_P (correlation_t, df); - q = gsl_cdf_tdist_Q (correlation_t, df); - tab_double (table, 4, i + 1, TAB_RIGHT, - 2.0 * (correlation_t > 0 ? q : p), NULL); + tab_double (table, 4, i + 1, TAB_RIGHT, + 2.0 * significance_of_correlation (pair->correlation, pair->n), NULL); } tab_submit (table); @@ -1435,6 +1401,23 @@ group_calc (const struct dictionary *dict, struct t_test_proc *proc, return 0; } + +static bool +is_criteria_value (const struct ccase *c, void *aux) +{ + const struct t_test_proc *proc = aux; + const union value *val = case_data (c, proc->indep_var); + int width = var_get_width (proc->indep_var); + + if ( value_equal (val, &proc->g_value[0], width)) + return true; + + if ( value_equal (val, &proc->g_value[1], width)) + return true; + + return false; +} + static void calculate (struct t_test_proc *proc, struct casereader *input, const struct dataset *ds) @@ -1444,7 +1427,7 @@ calculate (struct t_test_proc *proc, struct trbox test_results_box; struct taint *taint; struct ccase *c; - + int i; c = casereader_peek (input, 0); if (c == NULL) { @@ -1473,8 +1456,20 @@ calculate (struct t_test_proc *proc, break; case T_IND_SAMPLES: group_calc (dict, proc, casereader_clone (input)); - levene (dict, input, proc->indep_var, proc->n_vars, proc->vars, - proc->exclude); + + for (i = 0; i < proc->n_vars; ++i) + { + struct group_proc *grp_data = group_proc_get (proc->vars[i]); + + if ( proc->criterion == CMP_EQ ) + { + input = casereader_create_filter_func (input, is_criteria_value, NULL, + proc, + NULL); + } + + grp_data->levene = levene ( input, proc->indep_var, proc->vars[i], dict_get_weight (dict), proc->exclude); + } break; default: NOT_REACHED ();