X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fnpar.c;h=a572e09f56cbbcccc2b0bba90dc150db5f239cf7;hb=9ade26c8349b4434008c46cf09bc7473ec743972;hp=6bac48de913974ef42635aa78d7c4e2a427dc684;hpb=257609763a5728ce3d18d6a31d2a15b6e1dba4ed;p=pspp-builds.git diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index 6bac48de..a572e09f 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. -*-c-*- - Copyright (C) 2006, 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,44 +16,44 @@ #include -#include -#include "npar-summary.h" +#include "language/stats/npar.h" #include #include -#include "xalloc.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dictionary.h" +#include "data/procedure.h" +#include "data/settings.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/value-parser.h" +#include "language/lexer/variable-parser.h" +#include "language/stats/binomial.h" +#include "language/stats/chisquare.h" +#include "language/stats/cochran.h" +#include "language/stats/friedman.h" +#include "language/stats/kruskal-wallis.h" +#include "language/stats/mann-whitney.h" +#include "language/stats/npar-summary.h" +#include "language/stats/runs.h" +#include "language/stats/sign.h" +#include "language/stats/wilcoxon.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/hash-functions.h" +#include "libpspp/hmapx.h" +#include "libpspp/message.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "libpspp/taint.h" +#include "math/moments.h" + +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -86,6 +86,7 @@ struct cmd_npar_tests int sign; int runs; int friedman; + int kendall; int kruskal_wallis; int mann_whitney; int missing; @@ -125,6 +126,7 @@ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs * static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *); static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *); static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_kendall (struct lexer *, struct dataset *, struct npar_specs *); static int npar_cochran (struct lexer *, struct dataset *, struct npar_specs *); static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *); static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *); @@ -156,7 +158,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests memset (npt->a_statistics, 0, sizeof npt->a_statistics); for (;;) { - if (lex_match_hyphenated_word (lexer, "COCHRAN")) + if (lex_match_id (lexer, "COCHRAN")) { npt->cochran++; switch (npar_cochran (lexer, ds, nps)) @@ -172,7 +174,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "FRIEDMAN")) + else if (lex_match_id (lexer, "FRIEDMAN")) { npt->friedman++; switch (npar_friedman (lexer, ds, nps)) @@ -188,7 +190,23 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "RUNS")) + else if (lex_match_id (lexer, "KENDALL")) + { + npt->kendall++; + switch (npar_kendall (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } + else if (lex_match_id (lexer, "RUNS")) { npt->runs++; switch (npar_runs (lexer, ds, nps)) @@ -204,9 +222,9 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "CHISQUARE")) + else if (lex_match_id (lexer, "CHISQUARE")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->chisquare++; switch (npar_chisquare (lexer, ds, nps)) { @@ -217,13 +235,15 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests case 2: lex_error (lexer, NULL); goto lossage; + case 3: + continue; default: NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "BINOMIAL")) + else if (lex_match_id (lexer, "BINOMIAL")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->binomial++; switch (npar_binomial (lexer, ds, nps)) { @@ -238,10 +258,10 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "K-W") || - lex_match_hyphenated_word (lexer, "KRUSKAL-WALLIS")) + else if (lex_match_phrase (lexer, "K-W") || + lex_match_phrase (lexer, "KRUSKAL-WALLIS")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->kruskal_wallis++; switch (npar_kruskal_wallis (lexer, ds, nps)) { @@ -256,10 +276,10 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "M-W") || - lex_match_hyphenated_word (lexer, "MANN-WHITNEY")) + else if (lex_match_phrase (lexer, "M-W") || + lex_match_phrase (lexer, "MANN-WHITNEY")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->mann_whitney++; switch (npar_mann_whitney (lexer, ds, nps)) { @@ -274,9 +294,9 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "WILCOXON")) + else if (lex_match_id (lexer, "WILCOXON")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->wilcoxon++; switch (npar_wilcoxon (lexer, ds, nps)) { @@ -291,9 +311,9 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "SIGN")) + else if (lex_match_id (lexer, "SIGN")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->sign++; switch (npar_sign (lexer, ds, nps)) { @@ -308,36 +328,36 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "MISSING")) + else if (lex_match_id (lexer, "MISSING")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->missing++; if (npt->missing > 1) { msg (SE, _("The %s subcommand may be given only once."), "MISSING"); goto lossage; } - while (lex_token (lexer) != '/' && lex_token (lexer) != '.') + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { - if (lex_match_hyphenated_word (lexer, "ANALYSIS")) + if (lex_match_id (lexer, "ANALYSIS")) npt->miss = MISS_ANALYSIS; - else if (lex_match_hyphenated_word (lexer, "LISTWISE")) + else if (lex_match_id (lexer, "LISTWISE")) npt->miss = MISS_LISTWISE; - else if (lex_match_hyphenated_word (lexer, "INCLUDE")) + else if (lex_match_id (lexer, "INCLUDE")) nps->filter = MV_SYSTEM; - else if (lex_match_hyphenated_word (lexer, "EXCLUDE")) + else if (lex_match_id (lexer, "EXCLUDE")) nps->filter = MV_ANY; else { lex_error (lexer, NULL); goto lossage; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } } - else if (lex_match_hyphenated_word (lexer, "METHOD")) + else if (lex_match_id (lexer, "METHOD")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->method++; if (npt->method > 1) { @@ -357,15 +377,15 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } - else if (lex_match_hyphenated_word (lexer, "STATISTICS")) + else if (lex_match_id (lexer, "STATISTICS")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); npt->statistics++; - while (lex_token (lexer) != '/' && lex_token (lexer) != '.') + while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD) { - if (lex_match_hyphenated_word (lexer, "DESCRIPTIVES")) + if (lex_match_id (lexer, "DESCRIPTIVES")) npt->a_statistics[NPAR_ST_DESCRIPTIVES] = 1; - else if (lex_match_hyphenated_word (lexer, "QUARTILES")) + else if (lex_match_id (lexer, "QUARTILES")) npt->a_statistics[NPAR_ST_QUARTILES] = 1; else if (lex_match (lexer, T_ALL)) npt->a_statistics[NPAR_ST_ALL] = 1; @@ -374,22 +394,22 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests lex_error (lexer, NULL); goto lossage; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); } } else if ( settings_get_syntax () != COMPATIBLE && lex_match_id (lexer, "ALGORITHM")) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "COMPATIBLE")) settings_set_cmd_algorithm (COMPATIBLE); else if (lex_match_id (lexer, "ENHANCED")) settings_set_cmd_algorithm (ENHANCED); } - if (!lex_match (lexer, '/')) + if (!lex_match (lexer, T_SLASH)) break; } - if (lex_token (lexer) != '.') + if (lex_token (lexer) != T_ENDCMD) { lex_error (lexer, _("expecting end of command")); goto lossage; @@ -556,7 +576,7 @@ npar_runs (struct lexer *lexer, struct dataset *ds, nt->execute = runs_execute; nt->insert_variables = one_sample_insert_variables; - if ( lex_force_match (lexer, '(') ) + if ( lex_force_match (lexer, T_LPAREN) ) { if ( lex_match_id (lexer, "MEAN")) { @@ -582,8 +602,8 @@ npar_runs (struct lexer *lexer, struct dataset *ds, return 0; } - lex_force_match (lexer, ')'); - lex_force_match (lexer, '='); + lex_force_match (lexer, T_RPAREN); + lex_force_match (lexer, T_EQUALS); if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), &tp->vars, &tp->n_vars, PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) @@ -606,16 +626,18 @@ static int npar_friedman (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) { - struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); - struct npar_test *nt = &ft->parent; + struct friedman_test *ft = pool_alloc (specs->pool, sizeof (*ft)); + struct one_sample_test *ost = &ft->parent; + struct npar_test *nt = &ost->parent; + ft->kendalls_w = false; nt->execute = friedman_execute; nt->insert_variables = one_sample_insert_variables; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), - &ft->vars, &ft->n_vars, + &ost->vars, &ost->n_vars, PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) { return 2; @@ -631,6 +653,38 @@ npar_friedman (struct lexer *lexer, struct dataset *ds, return 1; } +static int +npar_kendall (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct friedman_test *kt = pool_alloc (specs->pool, sizeof (*kt)); + struct one_sample_test *ost = &kt->parent; + struct npar_test *nt = &ost->parent; + + kt->kendalls_w = true; + nt->execute = friedman_execute; + nt->insert_variables = one_sample_insert_variables; + + lex_match (lexer, T_EQUALS); + + if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &ost->vars, &ost->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) + { + return 2; + } + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + + static int npar_cochran (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) @@ -641,7 +695,7 @@ npar_cochran (struct lexer *lexer, struct dataset *ds, nt->execute = cochran_execute; nt->insert_variables = one_sample_insert_variables; - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), &ft->vars, &ft->n_vars, @@ -668,7 +722,7 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, struct chisquare_test *cstp = pool_alloc (specs->pool, sizeof (*cstp)); struct one_sample_test *tp = &cstp->parent; struct npar_test *nt = &tp->parent; - + int retval = 1; nt->execute = chisquare_execute; nt->insert_variables = one_sample_insert_variables; @@ -682,13 +736,13 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, cstp->ranged = false; - if ( lex_match (lexer, '(')) + if ( lex_match (lexer, T_LPAREN)) { cstp->ranged = true; if ( ! lex_force_num (lexer)) return 0; cstp->lo = lex_integer (lexer); lex_get (lexer); - lex_force_match (lexer, ','); + lex_force_match (lexer, T_COMMA); if (! lex_force_num (lexer) ) return 0; cstp->hi = lex_integer (lexer); if ( cstp->lo >= cstp->hi ) @@ -700,49 +754,44 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, return 0; } lex_get (lexer); - if (! lex_force_match (lexer, ')')) return 0; + if (! lex_force_match (lexer, T_RPAREN)) return 0; } cstp->n_expected = 0; cstp->expected = NULL; - if ( lex_match (lexer, '/') ) + if (lex_match_phrase (lexer, "/EXPECTED")) { - if ( lex_match_id (lexer, "EXPECTED") ) - { - lex_force_match (lexer, '='); - if ( ! lex_match_id (lexer, "EQUAL") ) - { - double f; - int n; - while ( lex_is_number (lexer) ) - { - int i; - n = 1; - f = lex_number (lexer); - lex_get (lexer); - if ( lex_match (lexer, '*')) - { - n = f; - f = lex_number (lexer); - lex_get (lexer); - } - lex_match (lexer, ','); - - cstp->n_expected += n; - cstp->expected = pool_realloc (specs->pool, - cstp->expected, - sizeof (double) * - cstp->n_expected); - for ( i = cstp->n_expected - n ; - i < cstp->n_expected; - ++i ) - cstp->expected[i] = f; + lex_force_match (lexer, T_EQUALS); + if ( ! lex_match_id (lexer, "EQUAL") ) + { + double f; + int n; + while ( lex_is_number (lexer) ) + { + int i; + n = 1; + f = lex_number (lexer); + lex_get (lexer); + if ( lex_match (lexer, T_ASTERISK)) + { + n = f; + f = lex_number (lexer); + lex_get (lexer); + } + lex_match (lexer, T_COMMA); + + cstp->n_expected += n; + cstp->expected = pool_realloc (specs->pool, + cstp->expected, + sizeof (double) * + cstp->n_expected); + for ( i = cstp->n_expected - n ; + i < cstp->n_expected; + ++i ) + cstp->expected[i] = f; - } - } - } - else - lex_put_back (lexer, '/'); + } + } } if ( cstp->ranged && cstp->n_expected > 0 && @@ -763,7 +812,7 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, specs->test[specs->n_tests - 1] = nt; - return 1; + return retval; } @@ -774,6 +823,7 @@ npar_binomial (struct lexer *lexer, struct dataset *ds, struct binomial_test *btp = pool_alloc (specs->pool, sizeof (*btp)); struct one_sample_test *tp = &btp->parent; struct npar_test *nt = &tp->parent; + bool equals = false; nt->execute = binomial_execute; nt->insert_variables = one_sample_insert_variables; @@ -782,33 +832,33 @@ npar_binomial (struct lexer *lexer, struct dataset *ds, btp->p = 0.5; - if ( lex_match (lexer, '(') ) + if ( lex_match (lexer, T_LPAREN) ) { + equals = false; if ( lex_force_num (lexer) ) { btp->p = lex_number (lexer); lex_get (lexer); - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } else return 0; } else - /* Kludge: q2c swallows the '=' so put it back here */ - lex_put_back (lexer, '='); + equals = true; - if (lex_match (lexer, '=') ) + if (equals || lex_match (lexer, T_EQUALS) ) { if (parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), &tp->vars, &tp->n_vars, PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) { - if (lex_match (lexer, '(')) + if (lex_match (lexer, T_LPAREN)) { lex_force_num (lexer); btp->category1 = lex_number (lexer); lex_get (lexer); - if ( lex_match (lexer, ',')) + if ( lex_match (lexer, T_COMMA)) { if ( ! lex_force_num (lexer) ) return 2; btp->category2 = lex_number (lexer); @@ -819,7 +869,7 @@ npar_binomial (struct lexer *lexer, struct dataset *ds, btp->cutpoint = btp->category1; } - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } } else @@ -878,8 +928,8 @@ parse_two_sample_related_test (struct lexer *lexer, PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) return false; - paired = (lex_match (lexer, '(') && - lex_match_id (lexer, "PAIRED") && lex_match (lexer, ')')); + paired = (lex_match (lexer, T_LPAREN) && + lex_match_id (lexer, "PAIRED") && lex_match (lexer, T_RPAREN)); } @@ -973,7 +1023,7 @@ parse_n_sample_related_test (struct lexer *lexer, nst->indep_var = parse_variable_const (lexer, dict); - if ( ! lex_force_match (lexer, '(')) + if ( ! lex_force_match (lexer, T_LPAREN)) return false; value_init (&nst->val1, var_get_width (nst->indep_var)); @@ -983,7 +1033,7 @@ parse_n_sample_related_test (struct lexer *lexer, return false; } - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); value_init (&nst->val2, var_get_width (nst->indep_var)); if ( ! parse_value (lexer, &nst->val2, var_get_width (nst->indep_var))) @@ -992,7 +1042,7 @@ parse_n_sample_related_test (struct lexer *lexer, return false; } - if ( ! lex_force_match (lexer, ')')) + if ( ! lex_force_match (lexer, T_RPAREN)) return false; return true; @@ -1164,14 +1214,14 @@ npar_method (struct lexer *lexer, struct npar_specs *specs) { specs->timer = 5.0; - if ( lex_match (lexer, '(')) + if ( lex_match (lexer, T_LPAREN)) { if ( lex_force_num (lexer) ) { specs->timer = lex_number (lexer); lex_get (lexer); } - lex_force_match (lexer, ')'); + lex_force_match (lexer, T_RPAREN); } } }