1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2019 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "math/shapiro-wilk.h"
23 #include <gsl/gsl_cdf.h>
24 #include "libpspp/cast.h"
25 #include "libpspp/compiler.h"
26 #include "libpspp/message.h"
29 #define N_(msgid) msgid
31 /* Return the sum of coeff[i] * x^i for all i in the range [0,order).
32 It is the caller's responsibility to ensure that coeff points to a
33 large enough array containing the desired coefficients. */
35 polynomial (const double *coeff, int order, double x)
38 for (int i = 0; i < order; ++i)
39 result += coeff[i] * pow (x, i);
45 m_i (struct shapiro_wilk *sw, int i)
49 double x = (i - 0.375) / (sw->n + 0.25);
51 return gsl_cdf_ugaussian_Pinv (x);
55 a_i (struct shapiro_wilk *sw, int i)
61 return -a_i (sw, sw->n - i + 1);
64 else if (i == sw->n - 1)
67 return m_i (sw, i) / sqrt (sw->epsilon);
73 acc (struct statistic *s, const struct ccase *cx UNUSED, double c,
76 struct shapiro_wilk *sw = UP_CAST (s, struct shapiro_wilk, parent.parent);
78 double int_part, frac_part;
79 frac_part = modf (c, &int_part);
81 if (frac_part != 0 && !sw->warned)
83 msg (MW, N_ ("One or more weight values are non-integer."
84 " Fractional parts will be ignored when calculating the Shapiro-Wilk statistic."));
88 for (int i = 0; i < int_part; ++i)
90 double a_ii = a_i (sw, cc - c + i + 1);
93 sw->numerator += a_ii * x_ii;
94 sw->denominator += pow (x_ii - sw->mean, 2);
99 destroy (struct statistic *s)
101 struct shapiro_wilk *sw = UP_CAST (s, struct shapiro_wilk, parent.parent);
108 shapiro_wilk_calculate (const struct shapiro_wilk *sw)
110 return sw->numerator * sw->numerator / sw->denominator;
113 /* Inititialse a SW ready for calculating the statistic for
114 a dataset of size N. */
115 struct shapiro_wilk *
116 shapiro_wilk_create (int n, double mean)
118 if (n < 3 || n > 5000)
121 struct shapiro_wilk *sw = XZALLOC (struct shapiro_wilk);
122 struct order_stats *os = &sw->parent;
123 struct statistic *stat = &os->parent;
125 const double c1[] = {0, 0.221157, -0.147981,
126 -2.071190, 4.434685, -2.706056};
128 const double c2[] = {0, 0.042981, -0.293762,
129 -1.752461, 5.682633, -3.582633};
133 const double u = 1.0 / sqrt (sw->n);
136 for (int i = 1; i <= sw->n; ++i)
138 double m_ii = m_i (sw, i);
142 double m_n1 = m_i (sw, sw->n);
143 double m_n2 = m_i (sw, sw->n - 1);
145 sw->a_n1 = polynomial (c1, 6, u);
146 sw->a_n1 += m_n1 / sqrt (m);
148 sw->a_n2 = polynomial (c2, 6, u);
149 sw->a_n2 += m_n2 / sqrt (m);
152 sw->epsilon = m - 2 * pow (m_n1, 2) - 2 * pow (m_n2, 2);
153 sw->epsilon /= 1 - 2 * pow (sw->a_n1, 2) - 2 * pow (sw->a_n2, 2);
157 os->accumulate = acc;
158 stat->destroy = destroy;
164 shapiro_wilk_significance (double n, double w)
166 const double g[] = {-2.273, 0.459};
167 const double c3[] = {.544,-.39978,.025054,-6.714e-4};
168 const double c4[] = {1.3822,-.77857,.062767,-.0020322};
169 const double c5[] = {-1.5861,-.31082,-.083751,.0038915 };
170 const double c6[] = {-.4803,-.082676,.0030302};
173 double y = log (1 - w);
176 double pi6 = 6.0 / M_PI;
177 double stqr = asin(sqrt (3/ 4.0));
178 double p = pi6 * (asin (sqrt (w)) - stqr);
179 return (p < 0) ? 0 : p;
183 double gamma = polynomial (g, 2, n);
184 y = - log (gamma - y);
185 m = polynomial (c3, 4, n);
186 s = exp (polynomial (c4, 4, n));
191 m = polynomial (c5, 4, xx);
192 s = exp (polynomial (c6, 3, xx));
195 double p = gsl_cdf_gaussian_Q (y - m, s);