From 566bb87d7bab018ba7eff48c61fa11c41ce675fa Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 23 Oct 2010 10:58:40 +0200 Subject: [PATCH] First attempt at the Friedman test --- src/language/stats/automake.mk | 1 + src/language/stats/friedman.c | 293 +++++++++++++++++++++++++++++++++ src/language/stats/friedman.h | 34 ++++ src/language/stats/npar.c | 55 ++++++- 4 files changed, 379 insertions(+), 4 deletions(-) create mode 100644 src/language/stats/friedman.c create mode 100644 src/language/stats/friedman.h diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index cf01a73c..55952622 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -20,6 +20,7 @@ language_stats_sources = \ src/language/stats/factor.c \ src/language/stats/flip.c \ src/language/stats/freq.c src/language/stats/freq.h \ + src/language/stats/friedman.c src/language/stats/friedman.h \ src/language/stats/glm.c \ src/language/stats/kruskal-wallis.c src/language/stats/kruskal-wallis.h \ src/language/stats/npar.c src/language/stats/npar.h \ diff --git a/src/language/stats/friedman.c b/src/language/stats/friedman.c new file mode 100644 index 00000000..b000f269 --- /dev/null +++ b/src/language/stats/friedman.c @@ -0,0 +1,293 @@ +/* PSPP - a program for statistical analysis. -*-c-*- + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#include + +#include "friedman.h" + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + + +#include "gettext.h" +#define _(msgid) gettext (msgid) + + +struct friedman +{ + double *rank_sum; + double cc; + double chi_sq; + const struct dictionary *dict; +}; + +static void show_ranks_box (const struct one_sample_test *ost, + const struct friedman *fr); + +static void show_sig_box (const struct one_sample_test *ost, + const struct friedman *fr); + + +struct datum +{ + long posn; + double x; +}; + +static int +cmp_x (const void *a_, const void *b_) +{ + const struct datum *a = a_; + const struct datum *b = b_; + + if (a->x < b->x) + return -1; + + return (a->x > b->x); +} + +static int +cmp_posn (const void *a_, const void *b_) +{ + const struct datum *a = a_; + const struct datum *b = b_; + + if (a->posn < b->posn) + return -1; + + return (a->posn > b->posn); +} + +void +friedman_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) +{ + double numerator = 0.0; + double denominator = 0.0; + int v; + struct ccase *c; + const struct dictionary *dict = dataset_dict (ds); + const struct variable *weight = dict_get_weight (dict); + + struct one_sample_test *ft = UP_CAST (test, struct one_sample_test, parent); + bool warn = true; + + double sigma_t = 0.0; + struct datum *row = xcalloc (ft->n_vars, sizeof *row); + + struct friedman fr; + fr.rank_sum = xcalloc (ft->n_vars, sizeof *fr.rank_sum); + fr.cc = 0.0; + fr.dict = dict; + for (v = 0; v < ft->n_vars; ++v) + { + row[v].posn = v; + fr.rank_sum[v] = 0.0; + } + + input = casereader_create_filter_weight (input, dict, &warn, NULL); + for (; (c = casereader_read (input)); case_unref (c)) + { + double prev_x = SYSMIS; + int run_length = 0; + + const double w = weight ? case_data (c, weight)->f: 1.0; + + fr.cc += w; + + for (v = 0; v < ft->n_vars; ++v) + { + const struct variable *var = ft->vars[v]; + const union value *val = case_data (c, var); + row[v].x = val->f; + } + + qsort (row, ft->n_vars, sizeof *row, cmp_x); + for (v = 0; v < ft->n_vars; ++v) + { + double x = row[v].x; + /* Replace value by the Rank */ + if ( prev_x == x) + { + /* Deal with ties */ + int i; + run_length++; + for (i = v - run_length; i < v; ++i) + { + row[i].x *= run_length ; + row[i].x += v + 1; + row[i].x /= run_length + 1; + } + row[v].x = row[v-1].x; + } + else + { + row[v].x = v + 1; + if ( run_length > 0) + { + double t = run_length + 1; + sigma_t += pow3 (t) - t; + } + run_length = 0; + } + prev_x = x; + } + if ( run_length > 0) + { + double t = run_length + 1; + sigma_t += pow3 (t) - t; + } + + qsort (row, ft->n_vars, sizeof *row, cmp_posn); + + for (v = 0; v < ft->n_vars; ++v) + fr.rank_sum[v] += row[v].x; + + } + free (row); + + + for (v = 0; v < ft->n_vars; ++v) + { + numerator += pow2 (fr.rank_sum[v]); + } + + numerator *= 12.0 / (fr.cc * ft->n_vars * ( ft->n_vars + 1)); + numerator -= 3 * fr.cc * ( ft->n_vars + 1); + + denominator = 1 - sigma_t / ( fr.cc * ft->n_vars * ( pow2 (ft->n_vars) - 1)); + + fr.chi_sq = numerator / denominator; + + show_ranks_box (ft, &fr); + + show_sig_box (ft, &fr); + + free (fr.rank_sum); +} + + + +#include + +static void +show_ranks_box (const struct one_sample_test *ost, const struct friedman *fr) +{ + const struct variable *weight = dict_get_weight (fr->dict); + const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0; + + int i; + const int row_headers = 1; + const int column_headers = 1; + struct tab_table *table = + tab_create (row_headers + 1, column_headers + ost->n_vars); + + tab_headers (table, row_headers, 0, column_headers, 0); + + tab_title (table, _("Ranks")); + + /* Vertical lines inside the box */ + tab_box (table, 1, 0, -1, TAL_1, + row_headers, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + + /* Box around the table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + + + tab_text (table, 1, 0, 0, _("Mean Rank")); + + tab_hline (table, TAL_2, 0, tab_nc (table) - 1, column_headers); + tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); + + for (i = 0 ; i < ost->n_vars ; ++i) + { + tab_text (table, 0, row_headers + i, + TAB_LEFT, var_to_string (ost->vars[i])); + + tab_double (table, 1, row_headers + i, + 0, fr->rank_sum[i] / fr->cc, wfmt); + } + + tab_submit (table); +} + + +static void +show_sig_box (const struct one_sample_test *ost, const struct friedman *fr) +{ + const struct variable *weight = dict_get_weight (fr->dict); + const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0; + + int i; + const int row_headers = 1; + const int column_headers = 0; + struct tab_table *table = + tab_create (row_headers + 1, column_headers + 4); + + tab_headers (table, row_headers, 0, column_headers, 0); + + tab_title (table, _("Test Statistics")); + + tab_text (table, 0, column_headers, + TAT_TITLE | TAB_LEFT , _("N")); + + tab_text (table, 0, 1 + column_headers, + TAT_TITLE | TAB_LEFT , _("Chi-Square")); + + tab_text (table, 0, 2 + column_headers, + TAT_TITLE | TAB_LEFT, _("df")); + + tab_text (table, 0, 3 + column_headers, + TAT_TITLE | TAB_LEFT, _("Asymp. Sig.")); + + /* Box around the table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 ); + + + tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers); + tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1); + + tab_double (table, 1, column_headers, + 0, fr->cc, wfmt); + + tab_double (table, 1, column_headers + 1, + 0, fr->chi_sq, 0); + + tab_double (table, 1, column_headers + 2, + 0, ost->n_vars - 1, &F_8_0); + + tab_double (table, 1, column_headers + 3, + 0, gsl_cdf_chisq_Q (fr->chi_sq, ost->n_vars - 1), + 0); + + tab_submit (table); +} diff --git a/src/language/stats/friedman.h b/src/language/stats/friedman.h new file mode 100644 index 00000000..4d271d85 --- /dev/null +++ b/src/language/stats/friedman.h @@ -0,0 +1,34 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#if !friedman_h +#define friedman_h 1 + +#include +#include +#include + + + +void friedman_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool, + double); + + +#endif diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index 73a36bda..1db0a1b7 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,7 @@ struct cmd_npar_tests int wilcoxon; int sign; int runs; + int friedman; int kruskal_wallis; int missing; int method; @@ -116,8 +118,8 @@ struct npar_specs /* Prototype for custom subcommands of NPAR TESTS. */ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *); static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *); -static int npar_runs (struct lexer *lexer, struct dataset *, struct npar_specs *); - +static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *); static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *); static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *); static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *); @@ -135,6 +137,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests npt->binomial = 0; npt->wilcoxon = 0; npt->runs = 0; + npt->friedman = 0; npt->sign = 0; npt->missing = 0; npt->miss = MISS_ANALYSIS; @@ -143,7 +146,23 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests memset (npt->a_statistics, 0, sizeof npt->a_statistics); for (;;) { - if (lex_match_hyphenated_word (lexer, "RUNS")) + if (lex_match_hyphenated_word (lexer, "FRIEDMAN")) + { + npt->friedman++; + switch (npar_friedman (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } + else if (lex_match_hyphenated_word (lexer, "RUNS")) { npt->runs++; switch (npar_runs (lexer, ds, nps)) @@ -158,7 +177,6 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests default: NOT_REACHED (); } - } else if (lex_match_hyphenated_word (lexer, "CHISQUARE")) { @@ -540,6 +558,35 @@ npar_runs (struct lexer *lexer, struct dataset *ds, return 1; } +static int +npar_friedman (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); + struct npar_test *nt = &ft->parent; + + nt->execute = friedman_execute; + nt->insert_variables = one_sample_insert_variables; + + lex_match (lexer, '='); + + if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &ft->vars, &ft->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) + { + return 2; + } + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + static int npar_chisquare (struct lexer *lexer, struct dataset *ds, -- 2.30.2