From 631e2243322503338b497cd13529fbb2c5833e3d Mon Sep 17 00:00:00 2001 From: John Darrington Date: Fri, 1 Jul 2011 20:04:58 +0200 Subject: [PATCH] Implemented the POSTHOC subcommand for the ONEWAY command. Currently, only LSD, Tukey, Bonferroni, Scheffe, Games-Howell and Sidak are supported. These are presented as Multiple Comparisons - the Homogeneous Subsets are not yet implemented. --- NEWS | 2 + doc/statistics.texi | 25 +- lib/automake.mk | 1 + lib/tukey/README | 1 + lib/tukey/automake.mk | 11 + lib/tukey/ptukey.c | 486 +++++++++++++++++++++++++++++++++ lib/tukey/qtukey.c | 253 +++++++++++++++++ lib/tukey/tukey.h | 25 ++ src/language/stats/oneway.c | 458 ++++++++++++++++++++++++++++--- src/math/automake.mk | 3 +- tests/language/stats/oneway.at | 352 +++++++++++++++++++++++- 11 files changed, 1578 insertions(+), 39 deletions(-) create mode 100644 lib/tukey/README create mode 100644 lib/tukey/automake.mk create mode 100644 lib/tukey/ptukey.c create mode 100644 lib/tukey/qtukey.c create mode 100644 lib/tukey/tukey.h diff --git a/NEWS b/NEWS index bd228d919a..5509d344af 100644 --- a/NEWS +++ b/NEWS @@ -44,6 +44,8 @@ Changes from 0.6.2 to 0.7.8: - MISSING VALUES can now assign missing values to long string variables. + - ONEWAY: the POSTHOC subcommand is now implemented. + - The following new subcommands to NPAR TESTS have been implemented: COCHRAN, FRIEDMAN, KENDALL, KRUSKAL-WALLIS, MANN-WHITNEY, MCNEMAR, SIGN, WILCOXON, and RUNS diff --git a/doc/statistics.texi b/doc/statistics.texi index 18fa79ac52..599e7e4290 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -1101,7 +1101,7 @@ ONEWAY /MISSING=@{ANALYSIS,LISTWISE@} @{EXCLUDE,INCLUDE@} /CONTRAST= value1 [, value2] ... [,valueN] /STATISTICS=@{DESCRIPTIVES,HOMOGENEITY@} - + /POSTHOC=@{BONFERRONI, GH, LSD, SCHEFFE, SIDAK, TUKEY, ALPHA ([value])@} @end display The @cmd{ONEWAY} procedure performs a one-way analysis of variance of @@ -1145,6 +1145,29 @@ A setting of EXCLUDE means that variables whose values are user-missing are to be excluded from the analysis. A setting of INCLUDE means they are to be included. The default is EXCLUDE. +Using the @code{POSTHOC} subcommand you can perform multiple +pairwise comparisons on the data. The following comparison methods +are available: +@itemize +@item LSD +Least Significant Difference. +@item TUKEY +Tukey Honestly Significant Difference. +@item BONFERRONI +Bonferroni test. +@item SCHEFFE +Scheff@'e's test. +@item SIDAK +Sidak test. +@item GH +The Games-Howell test. +@end itemize + +@noindent +The optional syntax @code{ALPHA(@var{value})} is used to indicate +that @var{value} should be used as the +confidence level for which the posthoc tests will be performed. +The default is 0.05. @node RANK @comment node-name, next, previous, up diff --git a/lib/automake.mk b/lib/automake.mk index 9bde4afcf2..3fb94bfe6f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -1,6 +1,7 @@ ## Process this file with automake to produce Makefile.in -*- makefile -*- include $(top_srcdir)/lib/linreg/automake.mk +include $(top_srcdir)/lib/tukey/automake.mk if HAVE_GUI include $(top_srcdir)/lib/gtk-contrib/automake.mk diff --git a/lib/tukey/README b/lib/tukey/README new file mode 100644 index 0000000000..e427872e26 --- /dev/null +++ b/lib/tukey/README @@ -0,0 +1 @@ +This is not part of the GNU PSPP program, but is used with GNU PSPP. diff --git a/lib/tukey/automake.mk b/lib/tukey/automake.mk new file mode 100644 index 0000000000..85bcd629d1 --- /dev/null +++ b/lib/tukey/automake.mk @@ -0,0 +1,11 @@ +## Process this file with automake to produce Makefile.in -*- makefile -*- + +noinst_LTLIBRARIES += lib/tukey/libtukey.la + +lib_tukey_libtukey_la_SOURCES = \ + lib/tukey/ptukey.c \ + lib/tukey/qtukey.c \ + lib/tukey/tukey.h + +EXTRA_DIST += lib/tukey/README + diff --git a/lib/tukey/ptukey.c b/lib/tukey/ptukey.c new file mode 100644 index 0000000000..c86ef23613 --- /dev/null +++ b/lib/tukey/ptukey.c @@ -0,0 +1,486 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* This file is taken from the R project source code, and modified. + The original copyright notice is reproduced below: */ + +/* + * Mathlib : A C Library of Special Functions + * Copyright (C) 1998 Ross Ihaka + * Copyright (C) 2000--2007 The R Development Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * http://www.r-project.org/Licenses/ + * + * SYNOPSIS + * + * #include + * double ptukey(q, rr, cc, df, lower_tail, log_p); + * + * DESCRIPTION + * + * Computes the probability that the maximum of rr studentized + * ranges, each based on cc means and with df degrees of freedom + * for the standard error, is less than q. + * + * The algorithm is based on that of the reference. + * + * REFERENCE + * + * Copenhaver, Margaret Diponzio & Holland, Burt S. + * Multiple comparisons of simple effects in + * the two-way analysis of variance with fixed effects. + * Journal of Statistical Computation and Simulation, + * Vol.30, pp.1-15, 1988. + */ + + + +#include + +#include "libpspp/compiler.h" +#include "tukey.h" + +#include +#include +#include +#include + +#define R_D__0 (log_p ? ML_NEGINF : 0.) /* 0 */ +#define R_D__1 (log_p ? 0. : 1.) /* 1 */ +#define R_DT_0 (lower_tail ? R_D__0 : R_D__1) /* 0 */ +#define R_DT_1 (lower_tail ? R_D__1 : R_D__0) /* 1 */ + +#define R_D_val(x) (log_p ? log(x) : (x)) /* x in pF(x,..) */ +#define R_D_Clog(p) (log_p ? log1p(-(p)) : (0.5 - (p) + 0.5)) /* [log](1-p) */ +#define R_DT_val(x) (lower_tail ? R_D_val(x) : R_D_Clog(x)) + + +#define ME_PRECISION 8 + + +static inline double +pnorm(double x, double mu, double sigma, int lower_tail, int log_p) +{ + assert (lower_tail == 1); + assert (log_p == 0); + assert (sigma == 1.0); + + return gsl_cdf_gaussian_P (x - mu, sigma); +} + + +static double +wprob (double w, double rr, double cc) +{ + const double M_1_SQRT_2PI = 1 / sqrt (2 * M_PI); + + +/* wprob() : + + This function calculates probability integral of Hartley's + form of the range. + + w = value of range + rr = no. of rows or groups + cc = no. of columns or treatments + ir = error flag = 1 if pr_w probability > 1 + pr_w = returned probability integral from (0, w) + + program will not terminate if ir is raised. + + bb = upper limit of legendre integration + iMax = maximum acceptable value of integral + nleg = order of legendre quadrature + ihalf = int ((nleg + 1) / 2) + wlar = value of range above which wincr1 intervals are used to + calculate second part of integral, + else wincr2 intervals are used. + C1, C2, C3 = values which are used as cutoffs for terminating + or modifying a calculation. + + M_1_SQRT_2PI = 1 / sqrt(2 * pi); from abramowitz & stegun, p. 3. + M_SQRT2 = sqrt(2) + xleg = legendre 12-point nodes + aleg = legendre 12-point coefficients + */ +#define nleg 12 +#define ihalf 6 + + /* looks like this is suboptimal for double precision. + (see how C1-C3 are used) + */ + /* const double iMax = 1.; not used if = 1 */ + static const double C1 = -30.; + static const double C2 = -50.; + static const double C3 = 60.; + static const double bb = 8.; + static const double wlar = 3.; + static const double wincr1 = 2.; + static const double wincr2 = 3.; + static const double xleg[ihalf] = { + 0.981560634246719250690549090149, + 0.904117256370474856678465866119, + 0.769902674194304687036893833213, + 0.587317954286617447296702418941, + 0.367831498998180193752691536644, + 0.125233408511468915472441369464 + }; + static const double aleg[ihalf] = { + 0.047175336386511827194615961485, + 0.106939325995318430960254718194, + 0.160078328543346226334652529543, + 0.203167426723065921749064455810, + 0.233492536538354808760849898925, + 0.249147045813402785000562436043 + }; + double a, ac, pr_w, b, binc, blb, c, cc1, + pminus, pplus, qexpo, qsqz, rinsum, wi, wincr, xx; + long double bub, einsum, elsum; + int j, jj; + + + qsqz = w * 0.5; + + /* if w >= 16 then the integral lower bound (occurs for c=20) */ + /* is 0.99999999999995 so return a value of 1. */ + + if (qsqz >= bb) + return 1.0; + + /* find (f(w/2) - 1) ^ cc */ + /* (first term in integral of hartley's form). */ + + pr_w = 2 * pnorm (qsqz, 0., 1., 1, 0) - 1.; /* erf(qsqz / M_SQRT2) */ + /* if pr_w ^ cc < 2e-22 then set pr_w = 0 */ + if (pr_w >= exp (C2 / cc)) + pr_w = pow (pr_w, cc); + else + pr_w = 0.0; + + /* if w is large then the second component of the */ + /* integral is small, so fewer intervals are needed. */ + + if (w > wlar) + wincr = wincr1; + else + wincr = wincr2; + + /* find the integral of second term of hartley's form */ + /* for the integral of the range for equal-length */ + /* intervals using legendre quadrature. limits of */ + /* integration are from (w/2, 8). two or three */ + /* equal-length intervals are used. */ + + /* blb and bub are lower and upper limits of integration. */ + + blb = qsqz; + binc = (bb - qsqz) / wincr; + bub = blb + binc; + einsum = 0.0; + + /* integrate over each interval */ + + cc1 = cc - 1.0; + for (wi = 1; wi <= wincr; wi++) + { + elsum = 0.0; + a = 0.5 * (bub + blb); + + /* legendre quadrature with order = nleg */ + + b = 0.5 * (bub - blb); + + for (jj = 1; jj <= nleg; jj++) + { + if (ihalf < jj) + { + j = (nleg - jj) + 1; + xx = xleg[j - 1]; + } + else + { + j = jj; + xx = -xleg[j - 1]; + } + c = b * xx; + ac = a + c; + + /* if exp(-qexpo/2) < 9e-14, */ + /* then doesn't contribute to integral */ + + qexpo = ac * ac; + if (qexpo > C3) + break; + + pplus = 2 * pnorm (ac, 0., 1., 1, 0); + pminus = 2 * pnorm (ac, w, 1., 1, 0); + + /* if rinsum ^ (cc-1) < 9e-14, */ + /* then doesn't contribute to integral */ + + rinsum = (pplus * 0.5) - (pminus * 0.5); + if (rinsum >= exp (C1 / cc1)) + { + rinsum = + (aleg[j - 1] * exp (-(0.5 * qexpo))) * pow (rinsum, cc1); + elsum += rinsum; + } + } + elsum *= (((2.0 * b) * cc) * M_1_SQRT_2PI); + einsum += elsum; + blb = bub; + bub += binc; + } + + /* if pr_w ^ rr < 9e-14, then return 0 */ + pr_w = einsum + pr_w; + if (pr_w <= exp (C1 / rr)) + return 0.; + + pr_w = pow (pr_w, rr); + if (pr_w >= 1.) /* 1 was iMax was eps */ + return 1.; + return pr_w; +} /* wprob() */ + +double +ptukey (double q, double rr, double cc, double df, int lower_tail, int log_p) +{ + const double ML_NEGINF = -1.0 / 0.0; +/* function ptukey() [was qprob() ]: + + q = value of studentized range + rr = no. of rows or groups + cc = no. of columns or treatments + df = degrees of freedom of error term + ir[0] = error flag = 1 if wprob probability > 1 + ir[1] = error flag = 1 if qprob probability > 1 + + qprob = returned probability integral over [0, q] + + The program will not terminate if ir[0] or ir[1] are raised. + + All references in wprob to Abramowitz and Stegun + are from the following reference: + + Abramowitz, Milton and Stegun, Irene A. + Handbook of Mathematical Functions. + New York: Dover publications, Inc. (1970). + + All constants taken from this text are + given to 25 significant digits. + + nlegq = order of legendre quadrature + ihalfq = int ((nlegq + 1) / 2) + eps = max. allowable value of integral + eps1 & eps2 = values below which there is + no contribution to integral. + + d.f. <= dhaf: integral is divided into ulen1 length intervals. else + d.f. <= dquar: integral is divided into ulen2 length intervals. else + d.f. <= deigh: integral is divided into ulen3 length intervals. else + d.f. <= dlarg: integral is divided into ulen4 length intervals. + + d.f. > dlarg: the range is used to calculate integral. + + M_LN2 = log(2) + + xlegq = legendre 16-point nodes + alegq = legendre 16-point coefficients + + The coefficients and nodes for the legendre quadrature used in + qprob and wprob were calculated using the algorithms found in: + + Stroud, A. H. and Secrest, D. + Gaussian Quadrature Formulas. + Englewood Cliffs, + New Jersey: Prentice-Hall, Inc, 1966. + + All values matched the tables (provided in same reference) + to 30 significant digits. + + f(x) = .5 + erf(x / sqrt(2)) / 2 for x > 0 + + f(x) = erfc( -x / sqrt(2)) / 2 for x < 0 + + where f(x) is standard normal c. d. f. + + if degrees of freedom large, approximate integral + with range distribution. + */ +#define nlegq 16 +#define ihalfq 8 + +/* const double eps = 1.0; not used if = 1 */ + static const double eps1 = -30.0; + static const double eps2 = 1.0e-14; + static const double dhaf = 100.0; + static const double dquar = 800.0; + static const double deigh = 5000.0; + static const double dlarg = 25000.0; + static const double ulen1 = 1.0; + static const double ulen2 = 0.5; + static const double ulen3 = 0.25; + static const double ulen4 = 0.125; + static const double xlegq[ihalfq] = { + 0.989400934991649932596154173450, + 0.944575023073232576077988415535, + 0.865631202387831743880467897712, + 0.755404408355003033895101194847, + 0.617876244402643748446671764049, + 0.458016777657227386342419442984, + 0.281603550779258913230460501460, + 0.950125098376374401853193354250e-1 + }; + static const double alegq[ihalfq] = { + 0.271524594117540948517805724560e-1, + 0.622535239386478928628438369944e-1, + 0.951585116824927848099251076022e-1, + 0.124628971255533872052476282192, + 0.149595988816576732081501730547, + 0.169156519395002538189312079030, + 0.182603415044923588866763667969, + 0.189450610455068496285396723208 + }; + double ans, f2, f21, f2lf, ff4, otsum, qsqz, rotsum, t1, twa1, ulen, wprb; + int i, j, jj; + +#ifdef IEEE_754 + abort (! (ISNAN (q) || ISNAN (rr) || ISNAN (cc) || ISNAN (df))); +#endif + + if (q <= 0) + return R_DT_0; + + /* df must be > 1 */ + /* there must be at least two values */ + assert (! (df < 2 || rr < 1 || cc < 2)); + + if (!isfinite (q)) + return R_DT_1; + + if (df > dlarg) + return R_DT_val (wprob (q, rr, cc)); + + /* calculate leading constant */ + + f2 = df * 0.5; + /* lgammafn(u) = log(gamma(u)) */ + f2lf = ((f2 * log (df)) - (df * M_LN2)) - gsl_sf_lngamma (f2); + f21 = f2 - 1.0; + + /* integral is divided into unit, half-unit, quarter-unit, or */ + /* eighth-unit length intervals depending on the value of the */ + /* degrees of freedom. */ + + ff4 = df * 0.25; + if (df <= dhaf) + ulen = ulen1; + else if (df <= dquar) + ulen = ulen2; + else if (df <= deigh) + ulen = ulen3; + else + ulen = ulen4; + + f2lf += log (ulen); + + /* integrate over each subinterval */ + + ans = 0.0; + + for (i = 1; i <= 50; i++) + { + otsum = 0.0; + + /* legendre quadrature with order = nlegq */ + /* nodes (stored in xlegq) are symmetric around zero. */ + + twa1 = (2 * i - 1) * ulen; + + for (jj = 1; jj <= nlegq; jj++) + { + if (ihalfq < jj) + { + j = jj - ihalfq - 1; + t1 = (f2lf + (f21 * log (twa1 + (xlegq[j] * ulen)))) + - (((xlegq[j] * ulen) + twa1) * ff4); + } + else + { + j = jj - 1; + t1 = (f2lf + (f21 * log (twa1 - (xlegq[j] * ulen)))) + + (((xlegq[j] * ulen) - twa1) * ff4); + + } + + /* if exp(t1) < 9e-14, then doesn't contribute to integral */ + if (t1 >= eps1) + { + if (ihalfq < jj) + { + qsqz = q * sqrt (((xlegq[j] * ulen) + twa1) * 0.5); + } + else + { + qsqz = q * sqrt (((-(xlegq[j] * ulen)) + twa1) * 0.5); + } + + /* call wprob to find integral of range portion */ + + wprb = wprob (qsqz, rr, cc); + rotsum = (wprb * alegq[j]) * exp (t1); + otsum += rotsum; + } + /* end legendre integral for interval i */ + /* L200: */ + } + + /* if integral for interval i < 1e-14, then stop. + * However, in order to avoid small area under left tail, + * at least 1 / ulen intervals are calculated. + */ + if (i * ulen >= 1.0 && otsum <= eps2) + break; + + /* end of interval i */ + /* L330: */ + + ans += otsum; + } + + assert (otsum <= eps2); /* not converged */ + + if (ans > 1.) + ans = 1.; + return R_DT_val (ans); +} + + + diff --git a/lib/tukey/qtukey.c b/lib/tukey/qtukey.c new file mode 100644 index 0000000000..08253df754 --- /dev/null +++ b/lib/tukey/qtukey.c @@ -0,0 +1,253 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* This file is taken from the R project source code, and modified. + The original copyright notice is reproduced below: */ + +/* + * Mathlib : A C Library of Special Functions + * Copyright (C) 1998 Ross Ihaka + * Copyright (C) 2000--2005 The R Development Core Team + * based in part on AS70 (C) 1974 Royal Statistical Society + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, a copy is available at + * http://www.r-project.org/Licenses/ + * + * SYNOPSIS + * + * #include + * double qtukey(p, rr, cc, df, lower_tail, log_p); + * + * DESCRIPTION + * + * Computes the quantiles of the maximum of rr studentized + * ranges, each based on cc means and with df degrees of freedom + * for the standard error, is less than q. + * + * The algorithm is based on that of the reference. + * + * REFERENCE + * + * Copenhaver, Margaret Diponzio & Holland, Burt S. + * Multiple comparisons of simple effects in + * the two-way analysis of variance with fixed effects. + * Journal of Statistical Computation and Simulation, + * Vol.30, pp.1-15, 1988. + */ + +#include + +#include "tukey.h" + +#include +#include + +#define TRUE (1) +#define FALSE (0) + +#define ML_POSINF (1.0 / 0.0) +#define ML_NEGINF (-1.0 / 0.0) + +#define R_D_Lval(p) (lower_tail ? (p) : (0.5 - (p) + 0.5)) /* p */ + +#define R_DT_qIv(p) (log_p ? (lower_tail ? exp(p) : - expm1(p)) \ + : R_D_Lval(p)) + + +static double fmax2(double x, double y) +{ +#ifdef IEEE_754 + if (ISNAN(x) || ISNAN(y)) + return x + y; +#endif + return (x < y) ? y : x; +} + + +#define R_Q_P01_boundaries(p, _LEFT_, _RIGHT_) \ + if (log_p) { \ + assert (p <= 0); \ + if(p == 0) /* upper bound*/ \ + return lower_tail ? _RIGHT_ : _LEFT_; \ + if(p == ML_NEGINF) \ + return lower_tail ? _LEFT_ : _RIGHT_; \ + } \ + else { /* !log_p */ \ + assert (p >= 0 && p <= 1); \ + if(p == 0) \ + return lower_tail ? _LEFT_ : _RIGHT_; \ + if(p == 1) \ + return lower_tail ? _RIGHT_ : _LEFT_; \ + } + + +/* qinv() : + * this function finds percentage point of the studentized range + * which is used as initial estimate for the secant method. + * function is adapted from portion of algorithm as 70 + * from applied statistics (1974) ,vol. 23, no. 1 + * by odeh, r. e. and evans, j. o. + * + * p = percentage point + * c = no. of columns or treatments + * v = degrees of freedom + * qinv = returned initial estimate + * + * vmax is cutoff above which degrees of freedom + * is treated as infinity. + */ + +static double qinv(double p, double c, double v) +{ + static const double p0 = 0.322232421088; + static const double q0 = 0.993484626060e-01; + static const double p1 = -1.0; + static const double q1 = 0.588581570495; + static const double p2 = -0.342242088547; + static const double q2 = 0.531103462366; + static const double p3 = -0.204231210125; + static const double q3 = 0.103537752850; + static const double p4 = -0.453642210148e-04; + static const double q4 = 0.38560700634e-02; + static const double c1 = 0.8832; + static const double c2 = 0.2368; + static const double c3 = 1.214; + static const double c4 = 1.208; + static const double c5 = 1.4142; + static const double vmax = 120.0; + + double ps, q, t, yi; + + ps = 0.5 - 0.5 * p; + yi = sqrt (log (1.0 / (ps * ps))); + t = yi + (((( yi * p4 + p3) * yi + p2) * yi + p1) * yi + p0) + / (((( yi * q4 + q3) * yi + q2) * yi + q1) * yi + q0); + if (v < vmax) t += (t * t * t + t) / v / 4.0; + q = c1 - c2 * t; + if (v < vmax) q += -c3 / v + c4 * t / v; + return t * (q * log (c - 1.0) + c5); +} + +/* + * Copenhaver, Margaret Diponzio & Holland, Burt S. + * Multiple comparisons of simple effects in + * the two-way analysis of variance with fixed effects. + * Journal of Statistical Computation and Simulation, + * Vol.30, pp.1-15, 1988. + * + * Uses the secant method to find critical values. + * + * p = confidence level (1 - alpha) + * rr = no. of rows or groups + * cc = no. of columns or treatments + * df = degrees of freedom of error term + * + * ir(1) = error flag = 1 if wprob probability > 1 + * ir(2) = error flag = 1 if ptukey probability > 1 + * ir(3) = error flag = 1 if convergence not reached in 50 iterations + * = 2 if df < 2 + * + * qtukey = returned critical value + * + * If the difference between successive iterates is less than eps, + * the search is terminated + */ + + +double qtukey(double p, double rr, double cc, double df, + int lower_tail, int log_p) +{ + static const double eps = 0.0001; + const int maxiter = 50; + + double ans = 0.0, valx0, valx1, x0, x1, xabs; + int iter; + +#ifdef IEEE_754 + if (ISNAN(p) || ISNAN(rr) || ISNAN(cc) || ISNAN(df)) { + ML_ERROR(ME_DOMAIN, "qtukey"); + return p + rr + cc + df; + } +#endif + + /* df must be > 1 ; there must be at least two values */ + assert (! (df < 2 || rr < 1 || cc < 2) ); + + R_Q_P01_boundaries (p, 0, ML_POSINF); + + p = R_DT_qIv(p); /* lower_tail,non-log "p" */ + + /* Initial value */ + + x0 = qinv(p, cc, df); + + /* Find prob(value < x0) */ + + valx0 = ptukey(x0, rr, cc, df, /*LOWER*/TRUE, /*LOG_P*/FALSE) - p; + + /* Find the second iterate and prob(value < x1). */ + /* If the first iterate has probability value */ + /* exceeding p then second iterate is 1 less than */ + /* first iterate; otherwise it is 1 greater. */ + + if (valx0 > 0.0) + x1 = fmax2(0.0, x0 - 1.0); + else + x1 = x0 + 1.0; + valx1 = ptukey(x1, rr, cc, df, /*LOWER*/TRUE, /*LOG_P*/FALSE) - p; + + /* Find new iterate */ + + for(iter=1 ; iter < maxiter ; iter++) { + ans = x1 - ((valx1 * (x1 - x0)) / (valx1 - valx0)); + valx0 = valx1; + + /* New iterate must be >= 0 */ + + x0 = x1; + if (ans < 0.0) { + ans = 0.0; + valx1 = -p; + } + /* Find prob(value < new iterate) */ + + valx1 = ptukey(ans, rr, cc, df, /*LOWER*/TRUE, /*LOG_P*/FALSE) - p; + x1 = ans; + + /* If the difference between two successive */ + /* iterates is less than eps, stop */ + + xabs = fabs(x1 - x0); + if (xabs < eps) + return ans; + } + + /* The process did not converge in 'maxiter' iterations */ + assert (0); + return ans; +} diff --git a/lib/tukey/tukey.h b/lib/tukey/tukey.h new file mode 100644 index 0000000000..69050e87b3 --- /dev/null +++ b/lib/tukey/tukey.h @@ -0,0 +1,25 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#ifndef TUKEY_H +#define TUKEY_H 1 + +double qtukey(double p, double rr, double cc, double df, int lower_tail, int log_p); + +double ptukey (double q, double rr, double cc, double df, int lower_tail, int log_p); + +#endif diff --git a/src/language/stats/oneway.c b/src/language/stats/oneway.c index 8e2805b3b1..bdac117c7c 100644 --- a/src/language/stats/oneway.c +++ b/src/language/stats/oneway.c @@ -37,6 +37,7 @@ #include "libpspp/misc.h" #include "libpspp/taint.h" #include "linreg/sweep.h" +#include "tukey/tukey.h" #include "math/categoricals.h" #include "math/covariance.h" #include "math/levene.h" @@ -45,7 +46,35 @@ #include "gettext.h" #define _(msgid) gettext (msgid) +#define N_(msgid) msgid +/* Workspace variable for each dependent variable */ +struct per_var_ws +{ + struct categoricals *cat; + struct covariance *cov; + struct levene *nl; + + double n; + + double sst; + double sse; + double ssa; + + int n_groups; + + double mse; +}; + +/* Per category data */ +struct descriptive_data +{ + const struct variable *var; + struct moments1 *mom; + + double minimum; + double maximum; +}; enum missing_type { @@ -70,8 +99,28 @@ struct contrasts_node { struct ll ll; struct ll_list coefficient_list; +}; - bool bad_count; /* True if the number of coefficients does not equal the number of groups */ + +struct oneway_spec; + +typedef double df_func (const struct per_var_ws *pvw, const struct moments1 *mom_i, const struct moments1 *mom_j); +typedef double ts_func (int k, const struct moments1 *mom_i, const struct moments1 *mom_j, double std_err); +typedef double p1tail_func (double ts, double df1, double df2); + +typedef double pinv_func (double std_err, double alpha, double df, int k, const struct moments1 *mom_i, const struct moments1 *mom_j); + + +struct posthoc +{ + const char *syntax; + const char *label; + + df_func *dff; + ts_func *tsf; + p1tail_func *p1f; + + pinv_func *pinv; }; struct oneway_spec @@ -91,33 +140,206 @@ struct oneway_spec /* The weight variable */ const struct variable *wv; + + /* The confidence level for multiple comparisons */ + double alpha; + + int *posthoc; + int n_posthoc; }; -/* Per category data */ -struct descriptive_data +static double +df_common (const struct per_var_ws *pvw, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) { - const struct variable *var; - struct moments1 *mom; + return pvw->n - pvw->n_groups; +} - double minimum; - double maximum; -}; +static double +df_individual (const struct per_var_ws *pvw UNUSED, const struct moments1 *mom_i, const struct moments1 *mom_j) +{ + double n_i, var_i; + double n_j, var_j; + double nom,denom; -/* Workspace variable for each dependent variable */ -struct per_var_ws + moments1_calculate (mom_i, &n_i, NULL, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, NULL, &var_j, 0, 0); + + nom = pow2 (var_i/n_i + var_j/n_j); + denom = pow2 (var_i/n_i) / (n_i - 1) + pow2 (var_j/n_j) / (n_j - 1); + + return nom / denom; +} + +static double lsd_pinv (double std_err, double alpha, double df, int k UNUSED, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) { - struct categoricals *cat; - struct covariance *cov; - struct levene *nl; + return std_err * gsl_cdf_tdist_Pinv (1.0 - alpha / 2.0, df); +} - double sst; - double sse; - double ssa; +static double bonferroni_pinv (double std_err, double alpha, double df, int k, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) +{ + const int m = k * (k - 1) / 2; + return std_err * gsl_cdf_tdist_Pinv (1.0 - alpha / (2.0 * m), df); +} - int n_groups; +static double sidak_pinv (double std_err, double alpha, double df, int k, const struct moments1 *mom_i, const struct moments1 *mom_j) +{ + const double m = k * (k - 1) / 2; + double lp = 1.0 - exp (log (1.0 - alpha) / m ) ; + return std_err * gsl_cdf_tdist_Pinv (1.0 - lp / 2.0, df); +} + +static double tukey_pinv (double std_err, double alpha, double df, int k, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) +{ + return std_err / sqrt (2.0) * qtukey (1 - alpha, 1.0, k, df, 1, 0); +} + +static double scheffe_pinv (double std_err, double alpha, double df, int k, const struct moments1 *mom_i UNUSED, const struct moments1 *mom_j UNUSED) +{ + double x = (k - 1) * gsl_cdf_fdist_Pinv (1.0 - alpha, k - 1, df); + return std_err * sqrt (x); +} + +static double gh_pinv (double std_err UNUSED, double alpha, double df, int k, const struct moments1 *mom_i, const struct moments1 *mom_j) +{ + double n_i, mean_i, var_i; + double n_j, mean_j, var_j; + double m; + + moments1_calculate (mom_i, &n_i, &mean_i, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, &mean_j, &var_j, 0, 0); + + m = sqrt ((var_i/n_i + var_j/n_j) / 2.0); + + return m * qtukey (1 - alpha, 1.0, k, df, 1, 0); +} + + +static double +multiple_comparison_sig (double std_err, + const struct per_var_ws *pvw, + const struct descriptive_data *dd_i, const struct descriptive_data *dd_j, + const struct posthoc *ph) +{ + int k = pvw->n_groups; + double df = ph->dff (pvw, dd_i->mom, dd_j->mom); + double ts = ph->tsf (k, dd_i->mom, dd_j->mom, std_err); + return ph->p1f (ts, k - 1, df); +} + +static double +mc_half_range (const struct oneway_spec *cmd, const struct per_var_ws *pvw, double std_err, const struct descriptive_data *dd_i, const struct descriptive_data *dd_j, const struct posthoc *ph) +{ + int k = pvw->n_groups; + double df = ph->dff (pvw, dd_i->mom, dd_j->mom); + + return ph->pinv (std_err, cmd->alpha, df, k, dd_i->mom, dd_j->mom); +} + +static double tukey_1tailsig (double ts, double df1, double df2) +{ + double twotailedsig = 1.0 - ptukey (ts, 1.0, df1 + 1, df2, 1, 0); + + return twotailedsig / 2.0; +} + +static double lsd_1tailsig (double ts, double df1 UNUSED, double df2) +{ + return ts < 0 ? gsl_cdf_tdist_P (ts, df2) : gsl_cdf_tdist_Q (ts, df2); +} + +static double sidak_1tailsig (double ts, double df1, double df2) +{ + double ex = (df1 + 1.0) * df1 / 2.0; + double lsd_sig = 2 * lsd_1tailsig (ts, df1, df2); + + return 0.5 * (1.0 - pow (1.0 - lsd_sig, ex)); +} + +static double bonferroni_1tailsig (double ts, double df1, double df2) +{ + const int m = (df1 + 1) * df1 / 2; + + double p = ts < 0 ? gsl_cdf_tdist_P (ts, df2) : gsl_cdf_tdist_Q (ts, df2); + p *= m; + + return p > 0.5 ? 0.5 : p; +} + +static double scheffe_1tailsig (double ts, double df1, double df2) +{ + return 0.5 * gsl_cdf_fdist_Q (ts, df1, df2); +} + + +static double tukey_test_stat (int k, const struct moments1 *mom_i, const struct moments1 *mom_j, double std_err) +{ + double n_i, mean_i, var_i; + double n_j, mean_j, var_j; + + moments1_calculate (mom_i, &n_i, &mean_i, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, &mean_j, &var_j, 0, 0); + + double ts = (mean_i - mean_j) / std_err; + ts = fabs (ts) * sqrt (2.0); + + return ts; +} + +static double lsd_test_stat (int k, const struct moments1 *mom_i, const struct moments1 *mom_j, double std_err) +{ + double n_i, mean_i, var_i; + double n_j, mean_j, var_j; + + moments1_calculate (mom_i, &n_i, &mean_i, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, &mean_j, &var_j, 0, 0); + + return (mean_i - mean_j) / std_err; +} + +static double scheffe_test_stat (int k, const struct moments1 *mom_i, const struct moments1 *mom_j, double std_err) +{ + double n_i, mean_i, var_i; + double n_j, mean_j, var_j; + + moments1_calculate (mom_i, &n_i, &mean_i, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, &mean_j, &var_j, 0, 0); + + double t = (mean_i - mean_j) / std_err; + t = pow2 (t); + t /= k - 1; + + return t; +} + +static double gh_test_stat (int k, const struct moments1 *mom_i, const struct moments1 *mom_j, double std_err) +{ + double n_i, mean_i, var_i; + double n_j, mean_j, var_j; + + moments1_calculate (mom_i, &n_i, &mean_i, &var_i, 0, 0); + moments1_calculate (mom_j, &n_j, &mean_j, &var_j, 0, 0); + + double thing = var_i / n_i + var_j / n_j; + thing /= 2.0; + thing = sqrt (thing); + + double ts = (mean_i - mean_j) / thing; + + return fabs (ts); +} + + + +static const struct posthoc ph_tests [] = + { + { "LSD", N_("LSD"), df_common, lsd_test_stat, lsd_1tailsig, lsd_pinv}, + { "TUKEY", N_("Tukey HSD"), df_common, tukey_test_stat, tukey_1tailsig, tukey_pinv}, + { "BONFERRONI", N_("Bonferroni"), df_common, lsd_test_stat, bonferroni_1tailsig, bonferroni_pinv}, + { "SCHEFFE", N_("Scheffé"), df_common, scheffe_test_stat, scheffe_1tailsig, scheffe_pinv}, + { "GH", N_("Games-Howell"), df_individual, gh_test_stat, tukey_1tailsig, gh_pinv}, + { "SIDAK", N_("Šidák"), df_common, lsd_test_stat, sidak_1tailsig, sidak_pinv} + }; - double mse; -}; struct oneway_workspace { @@ -151,6 +373,9 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) oneway.missing_type = MISS_ANALYSIS; oneway.exclude = MV_ANY; oneway.wv = dict_get_weight (dict); + oneway.alpha = 0.05; + oneway.posthoc = NULL; + oneway.n_posthoc = 0; ll_init (&oneway.contrast_list); @@ -197,6 +422,45 @@ cmd_oneway (struct lexer *lexer, struct dataset *ds) } } } + else if (lex_match_id (lexer, "POSTHOC")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) + { + int p; + bool method = false; + for (p = 0 ; p < sizeof (ph_tests) / sizeof (struct posthoc); ++p) + { + if (lex_match_id (lexer, ph_tests[p].syntax)) + { + oneway.n_posthoc++; + oneway.posthoc = xrealloc (oneway.posthoc, sizeof (*oneway.posthoc) * oneway.n_posthoc); + oneway.posthoc[oneway.n_posthoc - 1] = p; + method = true; + break; + } + } + if ( method == false) + { + if (lex_match_id (lexer, "ALPHA")) + { + if ( !lex_force_match (lexer, T_LPAREN)) + goto error; + lex_force_num (lexer); + oneway.alpha = lex_number (lexer); + lex_get (lexer); + if ( !lex_force_match (lexer, T_RPAREN)) + goto error; + } + else + { + msg (SE, _("The post hoc analysis method %s is not supported."), lex_tokcstr (lexer)); + lex_error (lexer, NULL); + goto error; + } + } + } + } else if (lex_match_id (lexer, "CONTRAST")) { struct contrasts_node *cl = xzalloc (sizeof *cl); @@ -490,8 +754,7 @@ run_oneway (const struct oneway_spec *cmd, gsl_matrix *cm = covariance_calculate_unnormalized (pvw->cov); const struct categoricals *cats = covariance_get_categoricals (pvw->cov); - double n; - moments1_calculate (ws.dd_total[v]->mom, &n, NULL, NULL, NULL, NULL); + moments1_calculate (ws.dd_total[v]->mom, &pvw->n, NULL, NULL, NULL, NULL); pvw->sst = gsl_matrix_get (cm, 0, 0); @@ -503,7 +766,7 @@ run_oneway (const struct oneway_spec *cmd, pvw->n_groups = categoricals_total (cats); - pvw->mse = (pvw->sst - pvw->ssa) / (n - pvw->n_groups); + pvw->mse = (pvw->sst - pvw->ssa) / (pvw->n - pvw->n_groups); gsl_matrix_free (cm); } @@ -538,6 +801,7 @@ run_oneway (const struct oneway_spec *cmd, static void show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspace *ws); static void show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspace *ws); +static void show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace *ws, int depvar); static void output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) @@ -546,7 +810,8 @@ output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) /* Check the sanity of the given contrast values */ struct contrasts_node *coeff_list = NULL; - ll_for_each (coeff_list, struct contrasts_node, ll, &cmd->contrast_list) + struct contrasts_node *coeff_next = NULL; + ll_for_each_safe (coeff_list, coeff_next, struct contrasts_node, ll, &cmd->contrast_list) { struct coeff_node *cn = NULL; double sum = 0; @@ -556,8 +821,10 @@ output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) if (ll_count (cl) != ws->actual_number_of_groups) { msg (SW, - _("Number of contrast coefficients must equal the number of groups")); - coeff_list->bad_count = true; + _("In contrast list %zu, the number of coefficients (%d) does not equal the number of groups (%d). This contrast list will be ignored."), + i, ll_count (cl), ws->actual_number_of_groups); + + ll_remove (&coeff_list->ll); continue; } @@ -581,6 +848,13 @@ output_oneway (const struct oneway_spec *cmd, struct oneway_workspace *ws) show_contrast_coeffs (cmd, ws); show_contrast_tests (cmd, ws); } + + if ( cmd->posthoc ) + { + int v; + for (v = 0 ; v < cmd->n_vars; ++v) + show_comparisons (cmd, ws, v); + } } @@ -954,6 +1228,7 @@ show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspa { const struct categoricals *cats = covariance_get_categoricals (cov); const union value *val = categoricals_get_value_by_category (cats, count); + struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll); struct string vstr; ds_init_empty (&vstr); @@ -964,14 +1239,7 @@ show_contrast_coeffs (const struct oneway_spec *cmd, const struct oneway_workspa ds_destroy (&vstr); - if (cn->bad_count) - tab_text (t, count + 2, c_num + 2, TAB_RIGHT, "?" ); - else - { - struct coeff_node *coeffn = ll_data (coeffi, struct coeff_node, ll); - - tab_text_format (t, count + 2, c_num + 2, TAB_RIGHT, "%g", coeffn->coeff); - } + tab_text_format (t, count + 2, c_num + 2, TAB_RIGHT, "%g", coeffn->coeff); } ++c_num; } @@ -1082,9 +1350,6 @@ show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspac (v * lines_per_variable) + i + 1 + n_contrasts, TAB_CENTER | TAT_TITLE, "%d", i + 1); - if (cn->bad_count) - continue; - for (coeffi = ll_head (&cn->coefficient_list); coeffi != ll_null (&cn->coefficient_list); ++ci, coeffi = ll_next (coeffi)) @@ -1182,3 +1447,124 @@ show_contrast_tests (const struct oneway_spec *cmd, const struct oneway_workspac } + +static void +show_comparisons (const struct oneway_spec *cmd, const struct oneway_workspace *ws, int v) +{ + const int n_cols = 8; + const int heading_rows = 2; + const int heading_cols = 3; + + int p; + int r = heading_rows ; + + const struct per_var_ws *pvw = &ws->vws[v]; + const struct categoricals *cat = pvw->cat; + const int n_rows = heading_rows + cmd->n_posthoc * pvw->n_groups * (pvw->n_groups - 1); + + struct tab_table *t = tab_create (n_cols, n_rows); + + tab_headers (t, heading_cols, 0, heading_rows, 0); + + /* Put a frame around the entire box, and vertical lines inside */ + tab_box (t, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + tab_box (t, + -1, -1, + -1, TAL_1, + heading_cols, 0, + n_cols - 1, n_rows - 1); + + tab_vline (t, TAL_2, heading_cols, 0, n_rows - 1); + + tab_title (t, _("Multiple Comparisons")); + + tab_text_format (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(I) %s"), var_to_string (cmd->indep_var)); + tab_text_format (t, 2, 1, TAB_LEFT | TAT_TITLE, _("(J) %s"), var_to_string (cmd->indep_var)); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (t, 3, 1, TAB_CENTER | TAT_TITLE, _("(I - J)")); + tab_text (t, 4, 1, TAB_CENTER | TAT_TITLE, _("Std. Error")); + tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Sig.")); + + tab_joint_text_format (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE, + _("%g%% Confidence Interval"), + (1 - cmd->alpha) * 100.0); + + tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound")); + tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound")); + + + for (p = 0; p < cmd->n_posthoc; ++p) + { + int i; + const struct posthoc *ph = &ph_tests[cmd->posthoc[p]]; + + tab_hline (t, TAL_2, 0, n_cols - 1, r); + + tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, gettext (ph->label)); + + for (i = 0; i < pvw->n_groups ; ++i) + { + double weight_i, mean_i, var_i; + int rx = 0; + struct string vstr; + int j; + struct descriptive_data *dd_i = categoricals_get_user_data_by_category (cat, i); + const union value *gval = categoricals_get_value_by_category (cat, i); + + ds_init_empty (&vstr); + var_append_value_name (cmd->indep_var, gval, &vstr); + + if ( i != 0) + tab_hline (t, TAL_1, 1, n_cols - 1, r); + tab_text (t, 1, r, TAB_LEFT | TAT_TITLE, ds_cstr (&vstr)); + + moments1_calculate (dd_i->mom, &weight_i, &mean_i, &var_i, 0, 0); + + for (j = 0 ; j < pvw->n_groups; ++j) + { + double weight_j, mean_j, var_j; + double half_range; + struct descriptive_data *dd_j = categoricals_get_user_data_by_category (cat, j); + if (j == i) + continue; + + ds_clear (&vstr); + gval = categoricals_get_value_by_category (cat, j); + var_append_value_name (cmd->indep_var, gval, &vstr); + tab_text (t, 2, r + rx, TAB_LEFT | TAT_TITLE, ds_cstr (&vstr)); + + moments1_calculate (dd_j->mom, &weight_j, &mean_j, &var_j, 0, 0); + + tab_double (t, 3, r + rx, 0, mean_i - mean_j, 0); + + double std_err = pvw->mse; + std_err *= weight_i + weight_j; + std_err /= weight_i * weight_j; + std_err = sqrt (std_err); + + tab_double (t, 4, r + rx, 0, std_err, 0); + + tab_double (t, 5, r + rx, 0, 2 * multiple_comparison_sig (std_err, pvw, dd_i, dd_j, ph), 0); + + half_range = mc_half_range (cmd, pvw, std_err, dd_i, dd_j, ph); + + tab_double (t, 6, r + rx, 0, + (mean_i - mean_j) - half_range, 0 ); + + tab_double (t, 7, r + rx, 0, + (mean_i - mean_j) + half_range, 0 ); + + rx++; + } + ds_destroy (&vstr); + r += pvw->n_groups - 1; + } + } + + tab_submit (t); +} diff --git a/src/math/automake.mk b/src/math/automake.mk index b29d9baad1..cdcc2c8cb7 100644 --- a/src/math/automake.mk +++ b/src/math/automake.mk @@ -4,7 +4,8 @@ noinst_LTLIBRARIES += src/math/libpspp-math.la src_math_libpspp_math_la_LIBADD = \ - lib/linreg/liblinreg.la + lib/linreg/liblinreg.la \ + lib/tukey/libtukey.la src_math_libpspp_math_la_SOURCES = \ src/math/chart-geometry.c \ diff --git a/tests/language/stats/oneway.at b/tests/language/stats/oneway.at index a0a3bafb74..ce121beb97 100644 --- a/tests/language/stats/oneway.at +++ b/tests/language/stats/oneway.at @@ -546,8 +546,358 @@ ONEWAY AT_CHECK([pspp -o pspp-weighted.csv oneway-weighted.sps], [0], [ignore], [ignore]) - AT_CHECK([diff pspp-weighted.csv pspp-unweighted.csv], [0]) +AT_CLEANUP + + + +AT_SETUP([ONEWAY posthoc LSD and BONFERRONI]) +AT_DATA([oneway-pig.sps],[dnl +SET FORMAT F12.3. +data list notable list /pigmentation * family *. +begin data. +36 1 +39 1 +43 1 +38 1 +37 1 +46 2 +47 2 +47 2 +47 2 +43 2 +40 3 +50 3 +44 3 +48 3 +50 3 +45 4 +53 4 +56 4 +52 4 +56 4 +end data. + + +oneway pigmentation by family + /statistics = descriptives + /posthoc = lsd bonferroni alpha (0.05) + . +]) + +AT_CHECK([pspp -O format=csv oneway-pig.sps], [0], +[Table: Descriptives +,,,,,,95% Confidence Interval for Mean,,, +,,N,Mean,Std. Deviation,Std. Error,Lower Bound,Upper Bound,Minimum,Maximum +pigmentation,1.000,5,38.600,2.702,1.208,35.245,41.955,36.000,43.000 +,2.000,5,46.000,1.732,.775,43.849,48.151,43.000,47.000 +,3.000,5,46.400,4.336,1.939,41.016,51.784,40.000,50.000 +,4.000,5,52.400,4.506,2.015,46.806,57.994,45.000,56.000 +,Total,20,45.850,5.967,1.334,43.057,48.643,36.000,56.000 + +Table: ANOVA +,,Sum of Squares,df,Mean Square,F,Significance +pigmentation,Between Groups,478.950,3,159.650,12.927,.000 +,Within Groups,197.600,16,12.350,, +,Total,676.550,19,,, + +Table: Multiple Comparisons +,,,Mean Difference,,,95% Confidence Interval, +,(I) family,(J) family,(I - J),Std. Error,Sig.,Lower Bound,Upper Bound +LSD,1.000,2.000,-7.400,2.223,.004,-12.112,-2.688 +,,3.000,-7.800,2.223,.003,-12.512,-3.088 +,,4.000,-13.800,2.223,.000,-18.512,-9.088 +,2.000,1.000,7.400,2.223,.004,2.688,12.112 +,,3.000,-.400,2.223,.859,-5.112,4.312 +,,4.000,-6.400,2.223,.011,-11.112,-1.688 +,3.000,1.000,7.800,2.223,.003,3.088,12.512 +,,2.000,.400,2.223,.859,-4.312,5.112 +,,4.000,-6.000,2.223,.016,-10.712,-1.288 +,4.000,1.000,13.800,2.223,.000,9.088,18.512 +,,2.000,6.400,2.223,.011,1.688,11.112 +,,3.000,6.000,2.223,.016,1.288,10.712 +Bonferroni,1.000,2.000,-7.400,2.223,.025,-14.086,-.714 +,,3.000,-7.800,2.223,.017,-14.486,-1.114 +,,4.000,-13.800,2.223,.000,-20.486,-7.114 +,2.000,1.000,7.400,2.223,.025,.714,14.086 +,,3.000,-.400,2.223,1.000,-7.086,6.286 +,,4.000,-6.400,2.223,.065,-13.086,.286 +,3.000,1.000,7.800,2.223,.017,1.114,14.486 +,,2.000,.400,2.223,1.000,-6.286,7.086 +,,4.000,-6.000,2.223,.095,-12.686,.686 +,4.000,1.000,13.800,2.223,.000,7.114,20.486 +,,2.000,6.400,2.223,.065,-.286,13.086 +,,3.000,6.000,2.223,.095,-.686,12.686 +]) + +AT_CLEANUP + + +AT_SETUP([ONEWAY posthoc Tukey HSD and Games-Howell]) +AT_DATA([oneway-tukey.sps],[dnl +set format = f11.3. +data list notable list /libido * dose *. +begin data. +3 0 +2 0 +1 0 +1 0 +4 0 +5 1 +2 1 +4 1 +2 1 +3 1 +7 2 +4 2 +5 2 +3 2 +6 2 +end data. + +variable label dose 'Dose of Viagra'. + +add value labels dose 0 'Placebo' 1 '1 Dose' 2 '2 Doses'. + +oneway libido by dose + /posthoc tukey gh. +]) + +AT_CHECK([pspp -O format=csv oneway-tukey.sps], [0], +[Table: ANOVA +,,Sum of Squares,df,Mean Square,F,Significance +libido,Between Groups,20.133,2,10.067,5.119,.025 +,Within Groups,23.600,12,1.967,, +,Total,43.733,14,,, + +Table: Multiple Comparisons +,,,Mean Difference,,,95% Confidence Interval, +,(I) Dose of Viagra,(J) Dose of Viagra,(I - J),Std. Error,Sig.,Lower Bound,Upper Bound +Tukey HSD,Placebo,1 Dose,-1.000,.887,.516,-3.366,1.366 +,,2 Doses,-2.800,.887,.021,-5.166,-.434 +,1 Dose,Placebo,1.000,.887,.516,-1.366,3.366 +,,2 Doses,-1.800,.887,.147,-4.166,.566 +,2 Doses,Placebo,2.800,.887,.021,.434,5.166 +,,1 Dose,1.800,.887,.147,-.566,4.166 +Games-Howell,Placebo,1 Dose,-1.000,.887,.479,-3.356,1.356 +,,2 Doses,-2.800,.887,.039,-5.439,-.161 +,1 Dose,Placebo,1.000,.887,.479,-1.356,3.356 +,,2 Doses,-1.800,.887,.185,-4.439,.839 +,2 Doses,Placebo,2.800,.887,.039,.161,5.439 +,,1 Dose,1.800,.887,.185,-.839,4.439 +]) AT_CLEANUP + +AT_SETUP([ONEWAY posthoc Sidak]) +AT_DATA([oneway-sidak.sps],[dnl +SET FORMAT F20.4. + +DATA LIST notable LIST /program score. +BEGIN DATA. +1 9 +1 12 +1 14 +1 11 +1 13 +2 10 +2 6 +2 9 +2 9 +2 10 +3 12 +3 14 +3 11 +3 13 +3 11 +4 9 +4 8 +4 11 +4 7 +4 8 +END DATA. + +ONEWAY + score BY program + /MISSING ANALYSIS + /POSTHOC = SIDAK. +]) + +AT_CHECK([pspp -O format=csv oneway-sidak.sps], [0], +[Table: ANOVA +,,Sum of Squares,df,Mean Square,F,Significance +score,Between Groups,54.9500,3,18.3167,7.0449,.0031 +,Within Groups,41.6000,16,2.6000,, +,Total,96.5500,19,,, + +Table: Multiple Comparisons +,,,Mean Difference,,,95% Confidence Interval, +,(I) program,(J) program,(I - J),Std. Error,Sig.,Lower Bound,Upper Bound +Šidák,1.0000,2.0000,3.0000,1.0198,.0561,-.0575,6.0575 +,,3.0000,-.4000,1.0198,.9993,-3.4575,2.6575 +,,4.0000,3.2000,1.0198,.0375,.1425,6.2575 +,2.0000,1.0000,-3.0000,1.0198,.0561,-6.0575,.0575 +,,3.0000,-3.4000,1.0198,.0250,-6.4575,-.3425 +,,4.0000,.2000,1.0198,1.0000,-2.8575,3.2575 +,3.0000,1.0000,.4000,1.0198,.9993,-2.6575,3.4575 +,,2.0000,3.4000,1.0198,.0250,.3425,6.4575 +,,4.0000,3.6000,1.0198,.0166,.5425,6.6575 +,4.0000,1.0000,-3.2000,1.0198,.0375,-6.2575,-.1425 +,,2.0000,-.2000,1.0198,1.0000,-3.2575,2.8575 +,,3.0000,-3.6000,1.0198,.0166,-6.6575,-.5425 +]) + +AT_CLEANUP + +AT_SETUP([ONEWAY posthoc Scheffe]) +AT_DATA([oneway-scheffe.sps],[dnl +set format = f11.3. +data list notable list /usage * group *. +begin data. +21.00 1 +19.00 1 +18.00 1 +25.00 1 +14.00 1 +13.00 1 +24.00 1 +19.00 1 +20.00 1 +21.00 1 +15.00 2 +10.00 2 +13.00 2 +16.00 2 +14.00 2 +24.00 2 +16.00 2 +14.00 2 +18.00 2 +16.00 2 +10.00 3 + 7.00 3 +13.00 3 +20.00 3 + .00 3 + 8.00 3 + 6.00 3 + 1.00 3 +12.00 3 +14.00 3 +18.00 4 +15.00 4 + 3.00 4 +27.00 4 + 6.00 4 +14.00 4 +13.00 4 +11.00 4 + 9.00 4 +18.00 4 +end data. + +variable label usage 'Days of Use'. + +add value labels group 0 'none' 1 'one' 2 'two' 3 'three' 4 'four'. + +oneway usage by group + /posthoc scheffe. +]) + +AT_CHECK([pspp -O format=csv oneway-scheffe.sps], [0], +[Table: ANOVA +,,Sum of Squares,df,Mean Square,F,Significance +Days of Use,Between Groups,555.275,3,185.092,6.663,.001 +,Within Groups,1000.100,36,27.781,, +,Total,1555.375,39,,, + +Table: Multiple Comparisons +,,,Mean Difference,,,95% Confidence Interval, +,(I) group,(J) group,(I - J),Std. Error,Sig.,Lower Bound,Upper Bound +Scheffé,one,two,3.800,2.357,.467,-3.112,10.712 +,,three,10.300,2.357,.001,3.388,17.212 +,,four,6.000,2.357,.110,-.912,12.912 +,two,one,-3.800,2.357,.467,-10.712,3.112 +,,three,6.500,2.357,.072,-.412,13.412 +,,four,2.200,2.357,.832,-4.712,9.112 +,three,one,-10.300,2.357,.001,-17.212,-3.388 +,,two,-6.500,2.357,.072,-13.412,.412 +,,four,-4.300,2.357,.358,-11.212,2.612 +,four,one,-6.000,2.357,.110,-12.912,.912 +,,two,-2.200,2.357,.832,-9.112,4.712 +,,three,4.300,2.357,.358,-2.612,11.212 +]) + +AT_CLEANUP + + +AT_SETUP([ONEWAY bad contrast count]) + +AT_DATA([oneway-bad-contrast.sps],[dnl +DATA LIST NOTABLE LIST /height * weight * temperature * sex *. +BEGIN DATA. +1884 88.6 39.97 0 +1801 90.9 39.03 0 +1801 91.7 38.98 0 +1607 56.3 36.26 1 +1608 46.3 46.26 1 +1607 55.9 37.84 1 +1604 56.6 36.81 1 +1606 56.1 34.56 1 +END DATA. + +ONEWAY /VARIABLES= height weight temperature BY sex + /CONTRAST = -1 1 + /CONTRAST = -3 3 + /CONTRAST = 2 -2 1 + /CONTRAST = -9 9 + . +]) + + +AT_CHECK([pspp -O format=csv oneway-bad-contrast.sps], [0], [dnl +"oneway-bad-contrast.sps:18: warning: ONEWAY: In contrast list 3, the number of coefficients (3) does not equal the number of groups (2). This contrast list will be ignored." + +Table: ANOVA +,,Sum of Squares,df,Mean Square,F,Significance +height,Between Groups,92629.63,1,92629.63,120.77,.00 +,Within Groups,4601.87,6,766.98,, +,Total,97231.50,7,,, +weight,Between Groups,2451.65,1,2451.65,174.59,.00 +,Within Groups,84.25,6,14.04,, +,Total,2535.90,7,,, +temperature,Between Groups,1.80,1,1.80,.13,.73 +,Within Groups,84.55,6,14.09,, +,Total,86.36,7,,, + +Table: Contrast Coefficients +,,sex, +,,.00,1.00 +Contrast,1,-1,1 +,2,-3,3 +,3,-9,9 + +Table: Contrast Tests +,,Contrast,Value of Contrast,Std. Error,t,df,Sig. (2-tailed) +height,Assume equal variances,1,-222.27,20.23,10.99,6,.00 +,,2,-666.80,60.68,10.99,6,.00 +,,3,-2000.40,182.03,10.99,6,.00 +,Does not assume equal,1,-222.27,27.67,-8.03,2.00,1.98 +,,2,-666.80,83.02,-8.03,2.00,1.98 +,,3,-2000.40,249.07,-8.03,2.00,1.98 +weight,Assume equal variances,1,-36.16,2.74,13.21,6,.00 +,,2,-108.48,8.21,13.21,6,.00 +,,3,-325.44,24.63,13.21,6,.00 +,Does not assume equal,1,-36.16,2.19,-16.48,5.42,2.00 +,,2,-108.48,6.58,-16.48,5.42,2.00 +,,3,-325.44,19.75,-16.48,5.42,2.00 +temperature,Assume equal variances,1,-.98,2.74,.36,6,.73 +,,2,-2.94,8.22,.36,6,.73 +,,3,-8.83,24.67,.36,6,.73 +,Does not assume equal,1,-.98,2.07,-.47,4.19,1.34 +,,2,-2.94,6.22,-.47,4.19,1.34 +,,3,-8.83,18.66,-.47,4.19,1.34 +]) + +AT_CLEANUP \ No newline at end of file -- 2.30.2