X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fnpar.c;h=1db0a1b79be2c526aebaebbe81eb471c7376a0dc;hb=2c327bbf83aee01efec9a44157521888e34f81f5;hp=1b4af1a0488ba66fc151fd0b5def57a5d0278401;hpb=ee2ee85900cb6e917678ff8621da0c8ef275f12a;p=pspp diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index 1b4af1a048..1db0a1b79b 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -23,6 +23,7 @@ #include #include "xalloc.h" + #include #include #include @@ -30,37 +31,35 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include #include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include "gettext.h" #define _(msgid) gettext (msgid) -struct dataset; /* Settings for subcommand specifiers. */ -enum - { - NPAR_ANALYSIS, - NPAR_LISTWISE, - }; - -enum +enum missing_type { - NPAR_INCLUDE, - NPAR_EXCLUDE + MISS_ANALYSIS, + MISS_LISTWISE, }; /* Array indices for STATISTICS subcommand. */ @@ -75,28 +74,23 @@ enum /* NPAR TESTS structure. */ struct cmd_npar_tests { - /* CHISQUARE subcommand. */ + /* Count variables indicating how many + of the subcommands have been given. */ int chisquare; - - /* BINOMIAL subcommand. */ int binomial; - - /* WILCOXON subcommand. */ int wilcoxon; - - /* SIGN subcommand. */ int sign; - - /* MISSING subcommand. */ + int runs; + int friedman; + int kruskal_wallis; int missing; - long miss; - long incl; - - /* METHOD subcommand. */ int method; - - /* STATISTICS subcommand. */ int statistics; + + /* How missing values should be treated */ + long miss; + + /* Which statistics have been requested */ int a_statistics[NPAR_ST_count]; }; @@ -124,8 +118,11 @@ struct npar_specs /* Prototype for custom subcommands of NPAR TESTS. */ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *); static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *); static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *); static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *); static int npar_method (struct lexer *, struct npar_specs *); /* Command parsing functions. */ @@ -139,16 +136,49 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests npt->chisquare = 0; npt->binomial = 0; npt->wilcoxon = 0; + npt->runs = 0; + npt->friedman = 0; npt->sign = 0; npt->missing = 0; - npt->miss = NPAR_ANALYSIS; - npt->incl = NPAR_EXCLUDE; + npt->miss = MISS_ANALYSIS; npt->method = 0; npt->statistics = 0; memset (npt->a_statistics, 0, sizeof npt->a_statistics); for (;;) { - if (lex_match_hyphenated_word (lexer, "CHISQUARE")) + if (lex_match_hyphenated_word (lexer, "FRIEDMAN")) + { + npt->friedman++; + switch (npar_friedman (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } + else if (lex_match_hyphenated_word (lexer, "RUNS")) + { + npt->runs++; + switch (npar_runs (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } + else if (lex_match_hyphenated_word (lexer, "CHISQUARE")) { lex_match (lexer, '='); npt->chisquare++; @@ -182,6 +212,24 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } + else if (lex_match_hyphenated_word (lexer, "K-W") || + lex_match_hyphenated_word (lexer, "KRUSKAL-WALLIS")) + { + lex_match (lexer, '='); + npt->kruskal_wallis++; + switch (npar_kruskal_wallis (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } else if (lex_match_hyphenated_word (lexer, "WILCOXON")) { lex_match (lexer, '='); @@ -222,19 +270,19 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests npt->missing++; if (npt->missing > 1) { - msg (SE, _ ("MISSING subcommand may be given only once.")); + msg (SE, _("The %s subcommand may be given only once."), "MISSING"); goto lossage; } while (lex_token (lexer) != '/' && lex_token (lexer) != '.') { if (lex_match_hyphenated_word (lexer, "ANALYSIS")) - npt->miss = NPAR_ANALYSIS; + npt->miss = MISS_ANALYSIS; else if (lex_match_hyphenated_word (lexer, "LISTWISE")) - npt->miss = NPAR_LISTWISE; + npt->miss = MISS_LISTWISE; else if (lex_match_hyphenated_word (lexer, "INCLUDE")) - npt->incl = NPAR_INCLUDE; + nps->filter = MV_SYSTEM; else if (lex_match_hyphenated_word (lexer, "EXCLUDE")) - npt->incl = NPAR_EXCLUDE; + nps->filter = MV_ANY; else { lex_error (lexer, NULL); @@ -249,7 +297,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests npt->method++; if (npt->method > 1) { - msg (SE, _ ("METHOD subcommand may be given only once.")); + msg (SE, _("The %s subcommand may be given only once."), "METHOD"); goto lossage; } switch (npar_method (lexer, nps)) @@ -299,7 +347,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests if (lex_token (lexer) != '.') { - lex_error (lexer, _ ("expecting end of command")); + lex_error (lexer, _("expecting end of command")); goto lossage; } @@ -310,14 +358,14 @@ lossage: } - - static void one_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *variables); + struct hmapx *); static void two_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *variables); + struct hmapx *); +static void n_sample_insert_variables (const struct npar_test *test, + struct hmapx *); static void npar_execute (struct casereader *input, @@ -332,7 +380,7 @@ npar_execute (struct casereader *input, const struct npar_test *test = specs->test[t]; if ( NULL == test->execute ) { - msg (SW, _ ("NPAR subcommand not currently implemented.")); + msg (SW, _("NPAR subcommand not currently implemented.")); continue; } test->execute (ds, casereader_clone (input), specs->filter, test, specs->exact, specs->timer); @@ -358,7 +406,6 @@ npar_execute (struct casereader *input, casereader_destroy (input); } - int cmd_npar_tests (struct lexer *lexer, struct dataset *ds) { @@ -366,15 +413,15 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) bool ok; int i; struct npar_specs npar_specs = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - struct const_hsh_table *var_hash; struct casegrouper *grouper; struct casereader *input, *group; + struct hmapx var_map = HMAPX_INITIALIZER (var_map); - npar_specs.pool = pool_create (); - var_hash = const_hsh_create_pool (npar_specs.pool, 0, - compare_vars_by_name, hash_var_by_name, - NULL, NULL); + npar_specs.pool = pool_create (); + npar_specs.filter = MV_ANY; + npar_specs.n_vars = -1; + npar_specs.vv = NULL; if ( ! parse_npar_tests (lexer, ds, &cmd, &npar_specs) ) { @@ -385,11 +432,24 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) for (i = 0; i < npar_specs.n_tests; ++i ) { const struct npar_test *test = npar_specs.test[i]; - test->insert_variables (test, var_hash); + test->insert_variables (test, &var_map); } - npar_specs.vv = (const struct variable **) const_hsh_sort (var_hash); - npar_specs.n_vars = const_hsh_count (var_hash); + { + struct hmapx_node *node; + struct variable *var; + npar_specs.n_vars = 0; + + HMAPX_FOR_EACH (var, node, &var_map) + { + npar_specs.n_vars ++; + npar_specs.vv = pool_nrealloc (npar_specs.pool, npar_specs.vv, npar_specs.n_vars, sizeof (*npar_specs.vv)); + npar_specs.vv[npar_specs.n_vars - 1] = var; + } + } + + qsort (npar_specs.vv, npar_specs.n_vars, sizeof (*npar_specs.vv), + compare_var_ptrs_by_name); if ( cmd.statistics ) { @@ -418,10 +478,8 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) } } - npar_specs.filter = cmd.incl == NPAR_EXCLUDE ? MV_ANY : MV_SYSTEM; - input = proc_open (ds); - if ( cmd.miss == NPAR_LISTWISE ) + if ( cmd.miss == MISS_LISTWISE ) { input = casereader_create_filter_missing (input, npar_specs.vv, @@ -437,13 +495,99 @@ cmd_npar_tests (struct lexer *lexer, struct dataset *ds) ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - const_hsh_destroy (var_hash); - pool_destroy (npar_specs.pool); + hmapx_destroy (&var_map); return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; } +static int +npar_runs (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct runs_test *rt = pool_alloc (specs->pool, sizeof (*rt)); + struct one_sample_test *tp = &rt->parent; + struct npar_test *nt = &tp->parent; + + nt->execute = runs_execute; + nt->insert_variables = one_sample_insert_variables; + + if ( lex_force_match (lexer, '(') ) + { + if ( lex_match_id (lexer, "MEAN")) + { + rt->cp_mode = CP_MEAN; + } + else if (lex_match_id (lexer, "MEDIAN")) + { + rt->cp_mode = CP_MEDIAN; + } + else if (lex_match_id (lexer, "MODE")) + { + rt->cp_mode = CP_MODE; + } + else if (lex_is_number (lexer)) + { + rt->cutpoint = lex_number (lexer); + rt->cp_mode = CP_CUSTOM; + lex_get (lexer); + } + else + { + lex_error (lexer, _("Expecting MEAN, MEDIAN, MODE or number")); + return 0; + } + + lex_force_match (lexer, ')'); + lex_force_match (lexer, '='); + if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &tp->vars, &tp->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) + { + return 2; + } + } + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + +static int +npar_friedman (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft)); + struct npar_test *nt = &ft->parent; + + nt->execute = friedman_execute; + nt->insert_variables = one_sample_insert_variables; + + lex_match (lexer, '='); + + if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &ft->vars, &ft->n_vars, + PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC)) + { + return 2; + } + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + + static int npar_chisquare (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) @@ -452,6 +596,7 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, struct one_sample_test *tp = &cstp->parent; struct npar_test *nt = &tp->parent; + nt->execute = chisquare_execute; nt->insert_variables = one_sample_insert_variables; @@ -476,7 +621,7 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, if ( cstp->lo >= cstp->hi ) { msg (ME, - _ ("The specified value of HI (%d) is " + _("The specified value of HI (%d) is " "lower than the specified value of LO (%d)"), cstp->hi, cstp->lo); return 0; @@ -531,7 +676,7 @@ npar_chisquare (struct lexer *lexer, struct dataset *ds, cstp->n_expected != cstp->hi - cstp->lo + 1 ) { msg (ME, - _ ("%d expected values were given, but the specified " + _("%d expected values were given, but the specified " "range (%d-%d) requires exactly %d values."), cstp->n_expected, cstp->lo, cstp->hi, cstp->hi - cstp->lo +1); @@ -670,7 +815,7 @@ parse_two_sample_related_test (struct lexer *lexer, if (paired) { if ( n_vlist1 != n_vlist2) - msg (SE, _ ("PAIRED was specified but the number of variables " + msg (SE, _("PAIRED was specified but the number of variables " "preceding WITH (%zu) did not match the number " "following (%zu)."), n_vlist1, n_vlist2); @@ -736,6 +881,51 @@ parse_two_sample_related_test (struct lexer *lexer, return true; } + +static bool +parse_n_sample_related_test (struct lexer *lexer, + const struct dictionary *dict, + struct n_sample_test *nst, + struct pool *pool + ) +{ + if (!parse_variables_const_pool (lexer, pool, + dict, + &nst->vars, &nst->n_vars, + PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) + return false; + + if ( ! lex_force_match (lexer, T_BY)) + return false; + + nst->indep_var = parse_variable_const (lexer, dict); + + if ( ! lex_force_match (lexer, '(')) + return false; + + value_init (&nst->val1, var_get_width (nst->indep_var)); + if ( ! parse_value (lexer, &nst->val1, var_get_width (nst->indep_var))) + { + value_destroy (&nst->val1, var_get_width (nst->indep_var)); + return false; + } + + if ( ! lex_force_match (lexer, ',')) + return false; + + value_init (&nst->val2, var_get_width (nst->indep_var)); + if ( ! parse_value (lexer, &nst->val2, var_get_width (nst->indep_var))) + { + value_destroy (&nst->val2, var_get_width (nst->indep_var)); + return false; + } + + if ( ! lex_force_match (lexer, ')')) + return false; + + return true; +} + static int npar_wilcoxon (struct lexer *lexer, struct dataset *ds, @@ -782,35 +972,89 @@ npar_sign (struct lexer *lexer, struct dataset *ds, return 1; } -/* Insert the variables for TEST into VAR_HASH */ +static int +npar_kruskal_wallis (struct lexer *lexer, struct dataset *ds, + struct npar_specs *specs) +{ + struct n_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp)); + struct npar_test *nt = &tp->parent; + + nt->insert_variables = n_sample_insert_variables; + + nt->execute = kruskal_wallis_execute; + + if (!parse_n_sample_related_test (lexer, dataset_dict (ds), + tp, specs->pool) ) + return 0; + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + specs->test[specs->n_tests - 1] = nt; + + return 1; +} + +static void +insert_variable_into_map (struct hmapx *var_map, const struct variable *var) +{ + size_t hash = hash_pointer (var, 0); + struct hmapx_node *node; + const struct variable *v = NULL; + + HMAPX_FOR_EACH_WITH_HASH (v, node, hash, var_map) + { + if ( v == var) + return ; + } + + hmapx_insert (var_map, CONST_CAST (struct variable *, var), hash); +} + +/* Insert the variables for TEST into VAR_MAP */ static void one_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *var_hash) + struct hmapx *var_map) { int i; - struct one_sample_test *ost = UP_CAST (test, struct one_sample_test, parent); + const struct one_sample_test *ost = UP_CAST (test, const struct one_sample_test, parent); for ( i = 0 ; i < ost->n_vars ; ++i ) - const_hsh_insert (var_hash, ost->vars[i]); + insert_variable_into_map (var_map, ost->vars[i]); } + static void two_sample_insert_variables (const struct npar_test *test, - struct const_hsh_table *var_hash) + struct hmapx *var_map) { int i; - - const struct two_sample_test *tst = (const struct two_sample_test *) test; + const struct two_sample_test *tst = UP_CAST (test, const struct two_sample_test, parent); for ( i = 0 ; i < tst->n_pairs ; ++i ) { variable_pair *pair = &tst->pairs[i]; - const_hsh_insert (var_hash, (*pair)[0]); - const_hsh_insert (var_hash, (*pair)[1]); + insert_variable_into_map (var_map, (*pair)[0]); + insert_variable_into_map (var_map, (*pair)[1]); } } +static void +n_sample_insert_variables (const struct npar_test *test, + struct hmapx *var_map) +{ + int i; + const struct n_sample_test *tst = UP_CAST (test, const struct n_sample_test, parent); + + for ( i = 0 ; i < tst->n_vars ; ++i ) + insert_variable_into_map (var_map, tst->vars[i]); + + insert_variable_into_map (var_map, tst->indep_var); +} + + static int npar_method (struct lexer *lexer, struct npar_specs *specs) {