+++ /dev/null
-/* PSPP - a program for statistical analysis. -*-c-*-
- Copyright (C) 2006, 2008, 2009, 2010, 2011, 2016 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-#include <config.h>
-
-#include "language/stats/npar.h"
-
-#include <stdlib.h>
-#include <math.h>
-
-#include "data/case.h"
-#include "data/casegrouper.h"
-#include "data/casereader.h"
-#include "data/dataset.h"
-#include "data/dictionary.h"
-#include "data/settings.h"
-#include "data/variable.h"
-#include "language/command.h"
-#include "language/lexer/lexer.h"
-#include "language/lexer/value-parser.h"
-#include "language/lexer/variable-parser.h"
-#include "language/stats/binomial.h"
-#include "language/stats/chisquare.h"
-#include "language/stats/ks-one-sample.h"
-#include "language/stats/cochran.h"
-#include "language/stats/friedman.h"
-#include "language/stats/jonckheere-terpstra.h"
-#include "language/stats/kruskal-wallis.h"
-#include "language/stats/mann-whitney.h"
-#include "language/stats/mcnemar.h"
-#include "language/stats/median.h"
-#include "language/stats/npar-summary.h"
-#include "language/stats/runs.h"
-#include "language/stats/sign.h"
-#include "language/stats/wilcoxon.h"
-#include "libpspp/array.h"
-#include "libpspp/assertion.h"
-#include "libpspp/cast.h"
-#include "libpspp/hash-functions.h"
-#include "libpspp/hmapx.h"
-#include "libpspp/message.h"
-#include "libpspp/pool.h"
-#include "libpspp/str.h"
-#include "libpspp/taint.h"
-#include "math/moments.h"
-
-#include "gl/xalloc.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-
-/* NPAR TESTS structure. */
-struct npar_specs
-{
- struct pool *pool;
- struct npar_test **test;
- size_t n_tests;
-
- const struct variable **vv; /* Compendium of all variables
- (those mentioned on ANY subcommand */
- int n_vars; /* Number of variables in vv */
-
- enum mv_class filter; /* Missing values to filter. */
- bool listwise_missing;
-
- bool descriptives; /* Descriptive statistics should be calculated */
- bool quartiles; /* Quartiles should be calculated */
-
- bool exact; /* Whether exact calculations have been requested */
- double timer; /* Maximum time (in minutes) to wait for exact calculations */
-};
-
-
-/* Prototype for custom subcommands of NPAR TESTS. */
-static bool npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_binomial (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_ks_one_sample (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_runs (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_friedman (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_kendall (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_cochran (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_jonckheere_terpstra (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_mann_whitney (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_mcnemar (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_median (struct lexer *, struct dataset *, struct npar_specs *);
-static bool npar_method (struct lexer *, struct npar_specs *);
-
-/* Command parsing functions. */
-
-static int
-parse_npar_tests (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *nps)
-{
- bool seen_missing = false;
- bool seen_method = false;
- lex_match (lexer, T_SLASH);
- do
- {
- if (lex_match_id (lexer, "COCHRAN"))
- {
- if (!npar_cochran (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "FRIEDMAN"))
- {
- if (!npar_friedman (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "KENDALL"))
- {
- if (!npar_kendall (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "RUNS"))
- {
- if (!npar_runs (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "CHISQUARE"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_chisquare (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "BINOMIAL"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_binomial (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "K-S") ||
- lex_match_phrase (lexer, "KOLMOGOROV-SMIRNOV"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_ks_one_sample (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "J-T") ||
- lex_match_phrase (lexer, "JONCKHEERE-TERPSTRA"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_jonckheere_terpstra (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "K-W") ||
- lex_match_phrase (lexer, "KRUSKAL-WALLIS"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_kruskal_wallis (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "MCNEMAR"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_mcnemar (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "M-W") ||
- lex_match_phrase (lexer, "MANN-WHITNEY"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_mann_whitney (lexer, ds, nps))
- return false;
- }
- else if (lex_match_phrase (lexer, "MEDIAN"))
- {
- if (!npar_median (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "WILCOXON"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_wilcoxon (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "SIGN"))
- {
- lex_match (lexer, T_EQUALS);
- if (!npar_sign (lexer, ds, nps))
- return false;
- }
- else if (lex_match_id (lexer, "MISSING"))
- {
- lex_match (lexer, T_EQUALS);
- if (seen_missing)
- {
- lex_sbc_only_once (lexer, "MISSING");
- return false;
- }
- seen_missing = true;
- while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
- {
- if (lex_match_id (lexer, "ANALYSIS"))
- nps->listwise_missing = false;
- else if (lex_match_id (lexer, "LISTWISE"))
- nps->listwise_missing = true;
- else if (lex_match_id (lexer, "INCLUDE"))
- nps->filter = MV_SYSTEM;
- else if (lex_match_id (lexer, "EXCLUDE"))
- nps->filter = MV_ANY;
- else
- {
- lex_error_expecting (lexer, "ANALYSIS", "LISTWISE",
- "INCLUDE", "EXCLUDE");
- return false;
- }
- lex_match (lexer, T_COMMA);
- }
- }
- else if (lex_match_id (lexer, "METHOD"))
- {
- lex_match (lexer, T_EQUALS);
- if (seen_method)
- {
- lex_sbc_only_once (lexer, "METHOD");
- return false;
- }
- seen_method = true;
- if (!npar_method (lexer, nps))
- return false;
- }
- else if (lex_match_id (lexer, "STATISTICS"))
- {
- lex_match (lexer, T_EQUALS);
- while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
- {
- if (lex_match_id (lexer, "DESCRIPTIVES"))
- nps->descriptives = true;
- else if (lex_match_id (lexer, "QUARTILES"))
- nps->quartiles = true;
- else if (lex_match (lexer, T_ALL))
- nps->descriptives = nps->quartiles = true;
- else
- {
- lex_error_expecting (lexer, "DESCRIPTIVES", "QUARTILES",
- "ALL");
- return false;
- }
- lex_match (lexer, T_COMMA);
- }
- }
- else if (lex_match_id (lexer, "ALGORITHM"))
- {
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "COMPATIBLE"))
- settings_set_cmd_algorithm (COMPATIBLE);
- else if (lex_match_id (lexer, "ENHANCED"))
- settings_set_cmd_algorithm (ENHANCED);
- else
- {
- lex_error_expecting (lexer, "COMPATIBLE", "ENHANCED");
- return false;
- }
- }
- else
- {
- lex_error_expecting (lexer, "COCHRAN", "FRIEDMAN", "KENDALL", "RUNS",
- "CHISQUARE", "BINOMIAL", "K-S", "J-T", "K-W",
- "MCNEMAR", "M-W", "MEDIAN", "WILCOXON",
- "SIGN", "MISSING", "METHOD", "STATISTICS",
- "ALGORITHM");
- return false;
- }
- }
- while (lex_match (lexer, T_SLASH));
-
- return true;
-}
-
-static void one_sample_insert_variables (const struct npar_test *test,
- struct hmapx *);
-
-static void two_sample_insert_variables (const struct npar_test *test,
- struct hmapx *);
-
-static void n_sample_insert_variables (const struct npar_test *test,
- struct hmapx *);
-
-static void
-npar_execute (struct casereader *input,
- const struct npar_specs *specs,
- const struct dataset *ds)
-{
- struct descriptives *summary_descriptives = NULL;
-
- for (size_t t = 0; t < specs->n_tests; ++t)
- {
- const struct npar_test *test = specs->test[t];
- test->execute (ds, casereader_clone (input), specs->filter,
- test, specs->exact, specs->timer);
- }
-
- if (specs->descriptives && specs->n_vars > 0)
- {
- summary_descriptives = xnmalloc (sizeof (*summary_descriptives),
- specs->n_vars);
-
- npar_summary_calc_descriptives (summary_descriptives,
- casereader_clone (input),
- dataset_dict (ds),
- specs->vv, specs->n_vars,
- specs->filter);
- }
-
- if ((specs->descriptives || specs->quartiles)
- && !taint_has_tainted_successor (casereader_get_taint (input)))
- do_summary_box (summary_descriptives, specs->vv, specs->n_vars,
- dict_get_weight_format (dataset_dict (ds)));
-
- free (summary_descriptives);
- casereader_destroy (input);
-}
-
-int
-cmd_npar_tests (struct lexer *lexer, struct dataset *ds)
-{
- struct npar_specs npar_specs = {
- .pool = pool_create (),
- .filter = MV_ANY,
- .listwise_missing = false,
- };
-
- if (!parse_npar_tests (lexer, ds, &npar_specs))
- {
- pool_destroy (npar_specs.pool);
- return CMD_FAILURE;
- }
-
- struct hmapx var_map = HMAPX_INITIALIZER (var_map);
- for (size_t i = 0; i < npar_specs.n_tests; ++i)
- {
- const struct npar_test *test = npar_specs.test[i];
- test->insert_variables (test, &var_map);
- }
-
- struct hmapx_node *node;
- struct variable *var;
- npar_specs.vv = pool_alloc (npar_specs.pool,
- hmapx_count (&var_map) * sizeof *npar_specs.vv);
- HMAPX_FOR_EACH (var, node, &var_map)
- npar_specs.vv[npar_specs.n_vars++] = var;
- assert (npar_specs.n_vars == hmapx_count (&var_map));
-
- sort (npar_specs.vv, npar_specs.n_vars, sizeof *npar_specs.vv,
- compare_var_ptrs_by_name, NULL);
-
- struct casereader *input = proc_open (ds);
- if (npar_specs.listwise_missing)
- input = casereader_create_filter_missing (input,
- npar_specs.vv,
- npar_specs.n_vars,
- npar_specs.filter,
- NULL, NULL);
-
- struct casegrouper *grouper = casegrouper_create_splits (input, dataset_dict (ds));
- struct casereader *group;
- while (casegrouper_get_next_group (grouper, &group))
- npar_execute (group, &npar_specs, ds);
- bool ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
-
- pool_destroy (npar_specs.pool);
- hmapx_destroy (&var_map);
-
- return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
-}
-
-static void
-add_test (struct npar_specs *specs, struct npar_test *nt)
-{
- specs->test = pool_realloc (specs->pool, specs->test,
- (specs->n_tests + 1) * sizeof *specs->test);
-
- specs->test[specs->n_tests++] = nt;
-}
-
-static bool
-npar_runs (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct runs_test *rt = pool_alloc (specs->pool, sizeof (*rt));
- struct one_sample_test *tp = &rt->parent;
- struct npar_test *nt = &tp->parent;
-
- nt->execute = runs_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- if (!lex_force_match (lexer, T_LPAREN))
- return false;
-
- if (lex_match_id (lexer, "MEAN"))
- rt->cp_mode = CP_MEAN;
- else if (lex_match_id (lexer, "MEDIAN"))
- rt->cp_mode = CP_MEDIAN;
- else if (lex_match_id (lexer, "MODE"))
- rt->cp_mode = CP_MODE;
- else if (lex_is_number (lexer))
- {
- rt->cutpoint = lex_number (lexer);
- rt->cp_mode = CP_CUSTOM;
- lex_get (lexer);
- }
- else
- {
- lex_error (lexer, _("Syntax error expecting %s, %s, %s or a number."),
- "MEAN", "MEDIAN", "MODE");
- return false;
- }
-
- if (!lex_force_match_phrase (lexer, ")="))
- return false;
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &tp->vars, &tp->n_vars,
- PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_friedman (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct friedman_test *ft = pool_alloc (specs->pool, sizeof (*ft));
- struct one_sample_test *ost = &ft->parent;
- struct npar_test *nt = &ost->parent;
-
- ft->kendalls_w = false;
- nt->execute = friedman_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- lex_match (lexer, T_EQUALS);
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &ost->vars, &ost->n_vars,
- PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_kendall (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct friedman_test *kt = pool_alloc (specs->pool, sizeof (*kt));
- struct one_sample_test *ost = &kt->parent;
- struct npar_test *nt = &ost->parent;
-
- kt->kendalls_w = true;
- nt->execute = friedman_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- lex_match (lexer, T_EQUALS);
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &ost->vars, &ost->n_vars,
- PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-
-static bool
-npar_cochran (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct one_sample_test *ft = pool_alloc (specs->pool, sizeof (*ft));
- struct npar_test *nt = &ft->parent;
-
- nt->execute = cochran_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- lex_match (lexer, T_EQUALS);
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &ft->vars, &ft->n_vars,
- PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_chisquare (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct chisquare_test *cstp = pool_alloc (specs->pool, sizeof (*cstp));
- struct one_sample_test *tp = &cstp->parent;
- struct npar_test *nt = &tp->parent;
-
- nt->execute = chisquare_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &tp->vars, &tp->n_vars,
- PV_NO_SCRATCH | PV_NO_DUPLICATE))
- return false;
-
- cstp->ranged = false;
-
- if (lex_match (lexer, T_LPAREN))
- {
- cstp->ranged = true;
- if (!lex_force_num (lexer))
- return false;
- cstp->lo = lex_number (lexer);
- lex_get (lexer);
-
- if (!lex_force_match (lexer, T_COMMA))
- return false;
- if (!lex_force_num_range_open (lexer, "HI", cstp->lo, DBL_MAX))
- return false;
- cstp->hi = lex_number (lexer);
- lex_get (lexer);
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
- }
-
- cstp->n_expected = 0;
- cstp->expected = NULL;
- int expected_start = 0;
- int expected_end = 0;
- if (lex_match_phrase (lexer, "/EXPECTED"))
- {
- if (!lex_force_match (lexer, T_EQUALS))
- return false;
-
- if (!lex_match_id (lexer, "EQUAL"))
- {
- expected_start = lex_ofs (lexer);
- while (lex_is_number (lexer))
- {
- int n = 1;
- double f = lex_number (lexer);
- lex_get (lexer);
- if (lex_match (lexer, T_ASTERISK))
- {
- n = f;
- if (!lex_force_num (lexer))
- return false;
- f = lex_number (lexer);
- lex_get (lexer);
- }
- lex_match (lexer, T_COMMA);
-
- cstp->n_expected += n;
- cstp->expected = pool_realloc (specs->pool,
- cstp->expected,
- sizeof (double) * cstp->n_expected);
- for (int i = cstp->n_expected - n; i < cstp->n_expected; ++i)
- cstp->expected[i] = f;
- }
- expected_end = lex_ofs (lexer) - 1;
- }
- }
-
- if (cstp->ranged && cstp->n_expected > 0 &&
- cstp->n_expected != cstp->hi - cstp->lo + 1)
- {
- lex_ofs_error (lexer, expected_start, expected_end,
- _("%d expected values were given, but the specified "
- "range (%d-%d) requires exactly %d values."),
- cstp->n_expected, cstp->lo, cstp->hi,
- cstp->hi - cstp->lo +1);
- return false;
- }
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_binomial (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct binomial_test *btp = pool_alloc (specs->pool, sizeof (*btp));
- struct one_sample_test *tp = &btp->parent;
- struct npar_test *nt = &tp->parent;
-
- nt->execute = binomial_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- btp->category1 = btp->category2 = btp->cutpoint = SYSMIS;
-
- btp->p = 0.5;
-
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_num (lexer))
- return false;
- btp->p = lex_number (lexer);
- lex_get (lexer);
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
- if (!lex_force_match (lexer, T_EQUALS))
- return false;
- }
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &tp->vars, &tp->n_vars,
- PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE))
- return false;
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_num (lexer))
- return false;
- btp->category1 = lex_number (lexer);
- lex_get (lexer);
- if (lex_match (lexer, T_COMMA))
- {
- if (!lex_force_num (lexer))
- return false;
- btp->category2 = lex_number (lexer);
- lex_get (lexer);
- }
- else
- btp->cutpoint = btp->category1;
-
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
- }
-
- add_test (specs, nt);
- return true;
-}
-
-static void
-ks_one_sample_parse_params (struct lexer *lexer, struct ks_one_sample_test *kst, int params)
-{
- assert (params == 1 || params == 2);
-
- if (lex_is_number (lexer))
- {
- kst->p[0] = lex_number (lexer);
-
- lex_get (lexer);
- if (params == 2)
- {
- lex_match (lexer, T_COMMA);
- if (lex_force_num (lexer))
- {
- kst->p[1] = lex_number (lexer);
- lex_get (lexer);
- }
- }
- }
-}
-
-static bool
-npar_ks_one_sample (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs)
-{
- struct ks_one_sample_test *kst = pool_alloc (specs->pool, sizeof (*kst));
- struct one_sample_test *tp = &kst->parent;
- struct npar_test *nt = &tp->parent;
-
- nt->execute = ks_one_sample_execute;
- nt->insert_variables = one_sample_insert_variables;
-
- kst->p[0] = kst->p[1] = SYSMIS;
-
- if (!lex_force_match (lexer, T_LPAREN))
- return false;
-
- if (lex_match_id (lexer, "NORMAL"))
- {
- kst->dist = KS_NORMAL;
- ks_one_sample_parse_params (lexer, kst, 2);
- }
- else if (lex_match_id (lexer, "POISSON"))
- {
- kst->dist = KS_POISSON;
- ks_one_sample_parse_params (lexer, kst, 1);
- }
- else if (lex_match_id (lexer, "UNIFORM"))
- {
- kst->dist = KS_UNIFORM;
- ks_one_sample_parse_params (lexer, kst, 2);
- }
- else if (lex_match_id (lexer, "EXPONENTIAL"))
- {
- kst->dist = KS_EXPONENTIAL;
- ks_one_sample_parse_params (lexer, kst, 1);
- }
- else
- {
- lex_error_expecting (lexer, "NORMAL", "POISSON", "UNIFORM",
- "EXPONENTIAL");
- return false;
- }
-
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
-
- lex_match (lexer, T_EQUALS);
-
- if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
- &tp->vars, &tp->n_vars,
- PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE))
- return false;
-
- add_test (specs, nt);
-
- return true;
-}
-
-static bool
-parse_two_sample_related_test (struct lexer *lexer,
- const struct dictionary *dict,
- struct two_sample_test *tp,
- struct pool *pool)
-{
- tp->parent.insert_variables = two_sample_insert_variables;
-
- const struct variable **v1;
- size_t n1;
- int vars_start = lex_ofs (lexer);
- if (!parse_variables_const_pool (lexer, pool, dict, &v1, &n1,
- PV_NUMERIC | PV_NO_SCRATCH | PV_DUPLICATE))
- return false;
-
- bool with = false;
- bool paired = false;
- const struct variable **v2 = NULL;
- size_t n2 = 0;
- if (lex_match (lexer, T_WITH))
- {
- with = true;
- if (!parse_variables_const_pool (lexer, pool, dict, &v2, &n2,
- PV_NUMERIC | PV_NO_SCRATCH | PV_DUPLICATE))
- return false;
- int vars_end = lex_ofs (lexer) - 1;
-
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_match_phrase (lexer, "PAIRED)"))
- return false;
- paired = true;
-
- if (n1 != n2)
- {
- lex_ofs_error (lexer, vars_start, vars_end,
- _("PAIRED was specified, but the number of "
- "variables preceding WITH (%zu) does not match "
- "the number following (%zu)."),
- n1, n2);
- return false;
- }
- }
- }
-
- tp->n_pairs = (paired ? n1
- : with ? n1 * n2
- : (n1 * (n1 - 1)) / 2);
- tp->pairs = pool_alloc (pool, sizeof (variable_pair) * tp->n_pairs);
-
- size_t n = 0;
- if (!with)
- for (size_t i = 0; i < n1 - 1; ++i)
- for (size_t j = i + 1; j < n1; ++j)
- {
- assert (n < tp->n_pairs);
- tp->pairs[n][0] = v1[i];
- tp->pairs[n][1] = v1[j];
- n++;
- }
- else if (paired)
- {
- assert (n1 == n2);
- for (size_t i = 0; i < n1; ++i)
- {
- tp->pairs[n][0] = v1[i];
- tp->pairs[n][1] = v2[i];
- n++;
- }
- }
- else
- {
- for (size_t i = 0; i < n1; ++i)
- for (size_t j = 0; j < n2; ++j)
- {
- tp->pairs[n][0] = v1[i];
- tp->pairs[n][1] = v2[j];
- n++;
- }
- }
- assert (n == tp->n_pairs);
-
- return true;
-}
-
-static bool
-parse_n_sample_related_test (struct lexer *lexer, const struct dictionary *dict,
- struct n_sample_test *nst, struct pool *pool)
-{
- if (!parse_variables_const_pool (lexer, pool, dict, &nst->vars, &nst->n_vars,
- PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE))
- return false;
-
- if (!lex_force_match (lexer, T_BY))
- return false;
-
- nst->indep_var = parse_variable_const (lexer, dict);
- if (!nst->indep_var)
- return false;
-
- if (!lex_force_match (lexer, T_LPAREN))
- return false;
-
- value_init (&nst->val1, var_get_width (nst->indep_var));
- if (!parse_value (lexer, &nst->val1, nst->indep_var))
- {
- value_destroy (&nst->val1, var_get_width (nst->indep_var));
- return false;
- }
-
- lex_match (lexer, T_COMMA);
-
- value_init (&nst->val2, var_get_width (nst->indep_var));
- if (!parse_value (lexer, &nst->val2, nst->indep_var))
- {
- value_destroy (&nst->val2, var_get_width (nst->indep_var));
- return false;
- }
-
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
-
- return true;
-}
-
-static bool
-npar_wilcoxon (struct lexer *lexer,
- struct dataset *ds,
- struct npar_specs *specs)
-{
- struct two_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
- nt->execute = wilcoxon_execute;
-
- if (!parse_two_sample_related_test (lexer, dataset_dict (ds),
- tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_mann_whitney (struct lexer *lexer,
- struct dataset *ds,
- struct npar_specs *specs)
-{
- struct n_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
-
- nt->insert_variables = n_sample_insert_variables;
- nt->execute = mann_whitney_execute;
-
- if (!parse_n_sample_related_test (lexer, dataset_dict (ds), tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_median (struct lexer *lexer,
- struct dataset *ds,
- struct npar_specs *specs)
-{
- struct median_test *mt = pool_alloc (specs->pool, sizeof (*mt));
- struct n_sample_test *tp = &mt->parent;
- struct npar_test *nt = &tp->parent;
-
- mt->median = SYSMIS;
-
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_num (lexer))
- return false;
- mt->median = lex_number (lexer);
- lex_get (lexer);
-
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
- }
-
- lex_match (lexer, T_EQUALS);
-
- nt->insert_variables = n_sample_insert_variables;
- nt->execute = median_execute;
-
- if (!parse_n_sample_related_test (lexer, dataset_dict (ds), tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_sign (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct two_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
-
- nt->execute = sign_execute;
-
- if (!parse_two_sample_related_test (lexer, dataset_dict (ds),
- tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_mcnemar (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct two_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
-
- nt->execute = mcnemar_execute;
-
- if (!parse_two_sample_related_test (lexer, dataset_dict (ds),
- tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-
-static bool
-npar_jonckheere_terpstra (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct n_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
-
- nt->insert_variables = n_sample_insert_variables;
- nt->execute = jonckheere_terpstra_execute;
-
- if (!parse_n_sample_related_test (lexer, dataset_dict (ds), tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static bool
-npar_kruskal_wallis (struct lexer *lexer, struct dataset *ds,
- struct npar_specs *specs)
-{
- struct n_sample_test *tp = pool_alloc (specs->pool, sizeof (*tp));
- struct npar_test *nt = &tp->parent;
-
- nt->insert_variables = n_sample_insert_variables;
-
- nt->execute = kruskal_wallis_execute;
-
- if (!parse_n_sample_related_test (lexer, dataset_dict (ds), tp, specs->pool))
- return false;
-
- add_test (specs, nt);
- return true;
-}
-
-static void
-insert_variable_into_map (struct hmapx *var_map, const struct variable *var)
-{
- size_t hash = hash_pointer (var, 0);
- struct hmapx_node *node;
- const struct variable *v = NULL;
-
- HMAPX_FOR_EACH_WITH_HASH (v, node, hash, var_map)
- if (v == var)
- return;
-
- hmapx_insert (var_map, CONST_CAST (struct variable *, var), hash);
-}
-
-/* Insert the variables for TEST into VAR_MAP */
-static void
-one_sample_insert_variables (const struct npar_test *test,
- struct hmapx *var_map)
-{
- const struct one_sample_test *ost = UP_CAST (test, const struct one_sample_test, parent);
-
- for (size_t i = 0; i < ost->n_vars; ++i)
- insert_variable_into_map (var_map, ost->vars[i]);
-}
-
-
-static void
-two_sample_insert_variables (const struct npar_test *test,
- struct hmapx *var_map)
-{
- const struct two_sample_test *tst = UP_CAST (test, const struct two_sample_test, parent);
-
- for (size_t i = 0; i < tst->n_pairs; ++i)
- {
- variable_pair *pair = &tst->pairs[i];
-
- insert_variable_into_map (var_map, (*pair)[0]);
- insert_variable_into_map (var_map, (*pair)[1]);
- }
-}
-
-static void
-n_sample_insert_variables (const struct npar_test *test,
- struct hmapx *var_map)
-{
- const struct n_sample_test *tst = UP_CAST (test, const struct n_sample_test, parent);
-
- for (size_t i = 0; i < tst->n_vars; ++i)
- insert_variable_into_map (var_map, tst->vars[i]);
-
- insert_variable_into_map (var_map, tst->indep_var);
-}
-
-static bool
-npar_method (struct lexer *lexer, struct npar_specs *specs)
-{
- if (lex_match_id (lexer, "EXACT"))
- {
- specs->exact = true;
- specs->timer = 0.0;
- if (lex_match_id (lexer, "TIMER"))
- {
- specs->timer = 5.0;
-
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_num (lexer))
- return false;
- specs->timer = lex_number (lexer);
- lex_get (lexer);
- if (!lex_force_match (lexer, T_RPAREN))
- return false;
- }
- }
- }
-
- return true;
-}