From e30f5e6b6bf7a3b862dacce6b17635e8bf0d909d Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 13 Aug 2011 18:05:37 +0200 Subject: [PATCH] Added an implementation for the One Sample Kolmogorov-Smirnov Test --- doc/statistics.texi | 40 +++ src/language/stats/automake.mk | 2 + src/language/stats/ks-one-sample.c | 401 +++++++++++++++++++++++++++++ src/language/stats/ks-one-sample.h | 49 ++++ src/language/stats/npar.c | 107 +++++++- src/ui/gui/automake.mk | 5 +- src/ui/gui/data-editor.ui | 7 + src/ui/gui/ks-one-sample-dialog.c | 199 ++++++++++++++ src/ui/gui/ks-one-sample-dialog.h | 24 ++ src/ui/gui/ks-one-sample.ui | 231 +++++++++++++++++ src/ui/gui/psppire-data-window.c | 2 + tests/language/stats/npar.at | 241 +++++++++++++++++ 12 files changed, 1305 insertions(+), 3 deletions(-) create mode 100644 src/language/stats/ks-one-sample.c create mode 100644 src/language/stats/ks-one-sample.h create mode 100644 src/ui/gui/ks-one-sample-dialog.c create mode 100644 src/ui/gui/ks-one-sample-dialog.h create mode 100644 src/ui/gui/ks-one-sample.ui diff --git a/doc/statistics.texi b/doc/statistics.texi index 1494b980..b1121b49 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -687,6 +687,7 @@ is used. * COCHRAN:: Cochran Q Test * FRIEDMAN:: Friedman Test * KENDALL:: Kendall's W Test +* KOLMOGOROV-SMIRNOV:: Kolmogorov Smirnov Test * KRUSKAL-WALLIS:: Kruskal-Wallis Test * MANN-WHITNEY:: Mann Whitney U Test * MCNEMAR:: McNemar Test @@ -818,6 +819,45 @@ It has the range [0,1] --- a value of zero indicates no agreement between the sa unity indicates complete agreement. +@node KOLMOGOROV-SMIRNOV +@subsection Kolmogorov-Smirnov Test +@vindex KOLMOGOROV-SMIRNOV +@vindex K-S +@cindex Kolmogorov-Smirnov test + +@display + [ /KOLMOGOROV-SMIRNOV (@{NORMAL [@var{mu}, @var{sigma}], UNIFORM [@var{min}, @var{max}], POISSON [@var{lambda}], EXPONENTIAL [@var{scale}] @}) = varlist ] +@end display + +The one sample Kolmogorov-Smirnov subcommand is used to test whether or not a dataset is +drawn from a particular distribution. Four distributions are supported, @i{viz:} +Normal, Uniform, Poisson and Exponential. + +Ideally you should provide the parameters of the distribution against which you wish to test +the data. For example, with the normal distribution the mean (@var{mu})and standard deviation (@var{sigma}) +should be given; with the uniform distribution, the minimum (@var{min})and maximum (@var{max}) value should +be provided. +However, if the parameters are omitted they will be imputed from the data. Imputing the +parameters reduces the power of the test so should be avoided if possible. + +In the following example, two variables @var{score} and @var{age} are tested to see if +they follow a normal distribution with a mean of 3.5 and a standard deviation of 2.0. +@example + NPAR TESTS + /KOLMOGOROV-SMIRNOV (normal 3.5 2.0) = @var{score} @var{age}. +@end example +If the variables need to be tested against different distributions, then a seperate +subcommand must be used. For example the following syntax tests @var{score} against +a normal distribution with mean of 3.5 and standard deviation of 2.0 whilst @var{age} +is tested against a normal distribution of mean 40 and standard deviation 1.5. +@example + NPAR TESTS + /KOLMOGOROV-SMIRNOV (normal 3.5 2.0) = @var{score} + /KOLMOGOROV-SMIRNOV (normal 40 1.5) = @var{age}. +@end example + +The abbreviated subcommand K-S may be used in place of KOLMOGOROV-SMIRNOV. + @node KRUSKAL-WALLIS @subsection Kruskal-Wallis Test @vindex KRUSKAL-WALLIS diff --git a/src/language/stats/automake.mk b/src/language/stats/automake.mk index 9beacb5d..4b4510e9 100644 --- a/src/language/stats/automake.mk +++ b/src/language/stats/automake.mk @@ -30,6 +30,8 @@ language_stats_sources = \ src/language/stats/glm.c \ src/language/stats/kruskal-wallis.c \ src/language/stats/kruskal-wallis.h \ + src/language/stats/ks-one-sample.c \ + src/language/stats/ks-one-sample.h \ src/language/stats/mann-whitney.c \ src/language/stats/mann-whitney.h \ src/language/stats/mcnemar.c \ diff --git a/src/language/stats/ks-one-sample.c b/src/language/stats/ks-one-sample.c new file mode 100644 index 00000000..aba2cbde --- /dev/null +++ b/src/language/stats/ks-one-sample.c @@ -0,0 +1,401 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "language/stats/ks-one-sample.h" + +#include +#include +#include + + +#include "math/sort.h" +#include "data/case.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "language/stats/freq.h" +#include "language/stats/npar.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/cast.h" +#include "libpspp/compiler.h" +#include "libpspp/hash-functions.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "output/tab.h" + +#include "gl/xalloc.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + + +/* The per test variable statistics */ +struct ks +{ + double obs_cc; + + double test_min ; + double test_max; + double mu; + double sigma; + + double diff_pos; + double diff_neg; + + double ssq; + double sum; +}; + +typedef double theoretical (const struct ks *ks, double x); +typedef theoretical *theoreticalfp; + +static double +theoretical_uniform (const struct ks *ks, double x) +{ + return gsl_cdf_flat_P (x, ks->test_min, ks->test_max); +} + +static double +theoretical_normal (const struct ks *ks, double x) +{ + return gsl_cdf_gaussian_P (x - ks->mu, ks->sigma); +} + +static double +theoretical_poisson (const struct ks *ks, double x) +{ + return gsl_cdf_poisson_P (x, ks->mu); +} + +static double +theoretical_exponential (const struct ks *ks, double x) +{ + return gsl_cdf_exponential_P (x, 1/ks->mu); +} + + +static const theoreticalfp theoreticalf[4] = +{ + theoretical_normal, + theoretical_uniform, + theoretical_poisson, + theoretical_exponential +}; + +/* + Return the assymptotic approximation to the significance of Z + */ +static double +ks_asymp_sig (double z) +{ + if (z < 0.27) + return 1; + + if (z >= 3.1) + return 0; + + if (z < 1) + { + double q = exp (-1.233701 * pow (z, -2)); + return 1 - 2.506628 * (q + pow (q, 9) + pow (q, 25))/ z ; + } + else + { + double q = exp (-2 * z * z); + return 2 * (q - pow (q, 4) + pow (q, 9) - pow (q, 16))/ z ; + } +} + +static void show_results (const struct ks *, const struct ks_one_sample_test *, const struct fmt_spec *); + + +void +ks_one_sample_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test) +{ + const struct dictionary *dict = dataset_dict (ds); + const struct ks_one_sample_test *kst = UP_CAST (test, const struct ks_one_sample_test, parent.parent); + const struct one_sample_test *ost = &kst->parent; + struct ccase *c; + const struct variable *wvar = dict_get_weight (dict); + const struct fmt_spec *wfmt = wvar ? var_get_print_format (wvar) : & F_8_0; + bool warn = true; + int v; + + struct ks *ks = xcalloc (sizeof *ks, ost->n_vars); + + for (v = 0; v < ost->n_vars; ++v) + { + ks[v].obs_cc = 0; + ks[v].test_min = DBL_MAX; + ks[v].test_max = -DBL_MAX; + ks[v].diff_pos = -DBL_MAX; + ks[v].diff_neg = DBL_MAX; + ks[v].sum = 0; + ks[v].ssq = 0; + } + + struct casereader *r = casereader_clone (input); + + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + const double weight = dict_get_case_weight (dict, c, &warn); + + for (v = 0; v < ost->n_vars; ++v) + { + const struct variable *var = ost->vars[v]; + const union value *val = case_data (c, var); + + if (var_is_value_missing (var, val, exclude)) + continue; + + minimize (&ks[v].test_min, val->f); + maximize (&ks[v].test_max, val->f); + + ks[v].obs_cc += weight; + ks[v].sum += val->f; + ks[v].ssq += pow2 (val->f); + } + } + casereader_destroy (r); + + for (v = 0; v < ost->n_vars; ++v) + { + switch (kst->dist) + { + case KS_UNIFORM: + if (kst->p[0] != SYSMIS) + ks[v].test_min = kst->p[0]; + + if (kst->p[1] != SYSMIS) + ks[v].test_max = kst->p[1]; + break; + case KS_NORMAL: + if (kst->p[0] != SYSMIS) + ks[v].mu = kst->p[0]; + else + ks[v].mu = ks[v].sum / ks[v].obs_cc; + + if (kst->p[1] != SYSMIS) + ks[v].sigma = kst->p[1]; + else + { + ks[v].sigma = ks[v].ssq - pow2 (ks[v].sum) / ks[v].obs_cc; + ks[v].sigma /= ks[v].obs_cc - 1; + ks[v].sigma = sqrt (ks[v].sigma); + } + + break; + case KS_POISSON: + case KS_EXPONENTIAL: + if (kst->p[0] != SYSMIS) + ks[v].mu = ks[v].sigma = kst->p[0]; + else + ks[v].mu = ks[v].sigma = ks[v].sum / ks[v].obs_cc; + break; + default: + NOT_REACHED (); + } + + const struct variable *var = ost->vars[v]; + double cc = 0; + double prev_empirical = 0; + r = sort_execute_1var (casereader_clone (input), var); + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + double theoretical, empirical; + double d, dp; + const double weight = dict_get_case_weight (dict, c, &warn); + const union value *val = case_data (c, var); + + if (var_is_value_missing (var, val, exclude)) + continue; + + cc += weight; + + empirical = cc / ks[v].obs_cc; + + theoretical = theoreticalf[kst->dist] (&ks[v], val->f); + + d = empirical - theoretical; + dp = prev_empirical - theoretical; + + if (d > 0) + maximize (&ks[v].diff_pos, d); + else + minimize (&ks[v].diff_neg, d); + + if (dp > 0) + maximize (&ks[v].diff_pos, dp); + else + minimize (&ks[v].diff_neg, dp); + + prev_empirical = empirical; + } + + casereader_destroy (r); + } + + show_results (ks, kst, wfmt); + + free (ks); + casereader_destroy (input); +} + + +static void +show_results (const struct ks *ks, + const struct ks_one_sample_test *kst, + const struct fmt_spec *wfmt) +{ + int i; + const int row_headers = 1; + const int column_headers = 2; + const int nc = kst->parent.n_vars + column_headers; + const int nr = 8 + row_headers; + struct tab_table *table = tab_create (nc, nr); + + tab_headers (table, row_headers, 0, column_headers, 0); + + tab_title (table, _("One-Sample Kolmogorov-Smirnov Test")); + + /* Box around the table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, nc - 1, nr - 1 ); + + tab_hline (table, TAL_2, 0, nc - 1, row_headers); + + tab_vline (table, TAL_1, column_headers, 0, nr - 1); + + tab_text (table, 0, 1, + TAT_TITLE | TAB_LEFT , _("N")); + + switch (kst->dist) + { + case KS_NORMAL: + tab_text (table, 0, 2, + TAT_TITLE | TAB_LEFT , _("Normal Parameters")); + + tab_text (table, 1, 2, + TAT_TITLE | TAB_LEFT , _("Mean")); + tab_text (table, 1, 3, + TAT_TITLE | TAB_LEFT , _("Std. Deviation")); + break; + case KS_UNIFORM: + tab_text (table, 0, 2, + TAT_TITLE | TAB_LEFT , _("Uniform Parameters")); + + tab_text (table, 1, 2, + TAT_TITLE | TAB_LEFT , _("Minimum")); + tab_text (table, 1, 3, + TAT_TITLE | TAB_LEFT , _("Maximum")); + break; + case KS_POISSON: + tab_text (table, 0, 2, + TAT_TITLE | TAB_LEFT , _("Poisson Parameters")); + + tab_text (table, 1, 2, + TAT_TITLE | TAB_LEFT , _("Lambda")); + break; + case KS_EXPONENTIAL: + tab_text (table, 0, 2, + TAT_TITLE | TAB_LEFT , _("Exponential Parameters")); + + tab_text (table, 1, 2, + TAT_TITLE | TAB_LEFT , _("Scale")); + break; + + default: + NOT_REACHED (); + } + + /* The variable columns */ + for (i = 0; i < kst->parent.n_vars; ++i) + { + double abs = 0; + double z = 0; + const int col = 2 + i; + tab_text (table, col, 0, + TAT_TITLE | TAB_CENTER , + var_to_string (kst->parent.vars[i])); + + switch (kst->dist) + { + case KS_UNIFORM: + tab_double (table, col, 1, 0, ks[i].obs_cc, wfmt); + tab_double (table, col, 2, 0, ks[i].test_min, NULL); + tab_double (table, col, 3, 0, ks[i].test_max, NULL); + break; + + case KS_NORMAL: + tab_double (table, col, 1, 0, ks[i].obs_cc, wfmt); + tab_double (table, col, 2, 0, ks[i].mu, NULL); + tab_double (table, col, 3, 0, ks[i].sigma, NULL); + break; + + case KS_POISSON: + case KS_EXPONENTIAL: + tab_double (table, col, 1, 0, ks[i].obs_cc, wfmt); + tab_double (table, col, 2, 0, ks[i].mu, NULL); + break; + + default: + NOT_REACHED (); + } + + abs = ks[i].diff_pos; + maximize (&abs, -ks[i].diff_neg); + + z = sqrt (ks[i].obs_cc) * abs; + + tab_double (table, col, 5, 0, ks[i].diff_pos, NULL); + tab_double (table, col, 6, 0, ks[i].diff_neg, NULL); + + tab_double (table, col, 4, 0, abs, NULL); + + tab_double (table, col, 7, 0, z, NULL); + tab_double (table, col, 8, 0, ks_asymp_sig (z), NULL); + } + + + tab_text (table, 0, 4, + TAT_TITLE | TAB_LEFT , _("Most Extreme Differences")); + + tab_text (table, 1, 4, + TAT_TITLE | TAB_LEFT , _("Absolute")); + + tab_text (table, 1, 5, + TAT_TITLE | TAB_LEFT , _("Positive")); + + tab_text (table, 1, 6, + TAT_TITLE | TAB_LEFT , _("Negative")); + + tab_text (table, 0, 7, + TAT_TITLE | TAB_LEFT , _("Kolmogorov-Smirnov Z")); + + tab_text (table, 0, 8, + TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)")); + + tab_submit (table); +} diff --git a/src/language/stats/ks-one-sample.h b/src/language/stats/ks-one-sample.h new file mode 100644 index 00000000..054cce41 --- /dev/null +++ b/src/language/stats/ks-one-sample.h @@ -0,0 +1,49 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#if !ks_one_sample_h +#define ks_one_sample_h 1 + +#include +#include +#include "language/stats/npar.h" + +enum dist + { + KS_NORMAL, + KS_UNIFORM, + KS_POISSON, + KS_EXPONENTIAL + }; + +struct ks_one_sample_test +{ + struct one_sample_test parent; + + double p[2]; + enum dist dist; +}; + +struct casereader; +struct dataset; + + +void ks_one_sample_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test); + +#endif diff --git a/src/language/stats/npar.c b/src/language/stats/npar.c index ce0c3d6d..bfeaf8f7 100644 --- a/src/language/stats/npar.c +++ b/src/language/stats/npar.c @@ -34,6 +34,7 @@ #include "language/lexer/variable-parser.h" #include "language/stats/binomial.h" #include "language/stats/chisquare.h" +#include "language/stats/ks-one-sample.h" #include "language/stats/cochran.h" #include "language/stats/friedman.h" #include "language/stats/kruskal-wallis.h" @@ -83,6 +84,7 @@ struct cmd_npar_tests int chisquare; int cochran; int binomial; + int ks_one_sample; int wilcoxon; int sign; int runs; @@ -126,6 +128,7 @@ struct npar_specs /* Prototype for custom subcommands of NPAR TESTS. */ static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *); static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *); +static int npar_ks_one_sample (struct lexer *, struct dataset *, struct npar_specs *); static int npar_runs (struct lexer *, struct dataset *, struct npar_specs *); static int npar_friedman (struct lexer *, struct dataset *, struct npar_specs *); static int npar_kendall (struct lexer *, struct dataset *, struct npar_specs *); @@ -147,6 +150,7 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests { npt->binomial = 0; npt->chisquare = 0; + npt->ks_one_sample = 0; npt->cochran = 0; npt->friedman = 0; npt->kruskal_wallis = 0; @@ -262,6 +266,24 @@ parse_npar_tests (struct lexer *lexer, struct dataset *ds, struct cmd_npar_tests NOT_REACHED (); } } + else if (lex_match_phrase (lexer, "K-S") || + lex_match_phrase (lexer, "KOLMOGOROV-SMIRNOV")) + { + lex_match (lexer, T_EQUALS); + npt->ks_one_sample++; + switch (npar_ks_one_sample (lexer, ds, nps)) + { + case 0: + goto lossage; + case 1: + break; + case 2: + lex_error (lexer, NULL); + goto lossage; + default: + NOT_REACHED (); + } + } else if (lex_match_phrase (lexer, "K-W") || lex_match_phrase (lexer, "KRUSKAL-WALLIS")) { @@ -909,12 +931,93 @@ npar_binomial (struct lexer *lexer, struct dataset *ds, } + +static void +ks_one_sample_parse_params (struct lexer *lexer, struct ks_one_sample_test *kst, int params) +{ + assert (params == 1 || params == 2); + + if (lex_is_number (lexer)) + { + kst->p[0] = lex_number (lexer); + + lex_get (lexer); + if ( params == 2) + { + lex_match (lexer, T_COMMA); + if (lex_force_num (lexer)) + { + kst->p[1] = lex_number (lexer); + lex_get (lexer); + } + } + } +} + +static int +npar_ks_one_sample (struct lexer *lexer, struct dataset *ds, struct npar_specs *specs) +{ + struct ks_one_sample_test *kst = pool_alloc (specs->pool, sizeof (*kst)); + struct one_sample_test *tp = &kst->parent; + struct npar_test *nt = &tp->parent; + + nt->execute = ks_one_sample_execute; + nt->insert_variables = one_sample_insert_variables; + + kst->p[0] = kst->p[1] = SYSMIS; + + if (! lex_force_match (lexer, T_LPAREN)) + return 2; + + if (lex_match_id (lexer, "NORMAL")) + { + kst->dist = KS_NORMAL; + ks_one_sample_parse_params (lexer, kst, 2); + } + else if (lex_match_id (lexer, "POISSON")) + { + kst->dist = KS_POISSON; + ks_one_sample_parse_params (lexer, kst, 1); + } + else if (lex_match_id (lexer, "UNIFORM")) + { + kst->dist = KS_UNIFORM; + ks_one_sample_parse_params (lexer, kst, 2); + } + else if (lex_match_id (lexer, "EXPONENTIAL")) + { + kst->dist = KS_EXPONENTIAL; + ks_one_sample_parse_params (lexer, kst, 1); + } + else + return 2; + + if (! lex_force_match (lexer, T_RPAREN)) + return 2; + + lex_match (lexer, T_EQUALS); + + if (! parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds), + &tp->vars, &tp->n_vars, + PV_NUMERIC | PV_NO_SCRATCH | PV_NO_DUPLICATE) ) + return 2; + + specs->n_tests++; + specs->test = pool_realloc (specs->pool, + specs->test, + sizeof (*specs->test) * specs->n_tests); + + specs->test[specs->n_tests - 1] = kst; + + return 1; +} + + static bool parse_two_sample_related_test (struct lexer *lexer, const struct dictionary *dict, struct two_sample_test *test_parameters, - struct pool *pool - ) + struct pool *pool) { int n = 0; bool paired = false; diff --git a/src/ui/gui/automake.mk b/src/ui/gui/automake.mk index c815be99..e4ef91eb 100644 --- a/src/ui/gui/automake.mk +++ b/src/ui/gui/automake.mk @@ -20,6 +20,7 @@ UI_FILES = \ src/ui/gui/frequencies.ui \ src/ui/gui/k-means.ui \ src/ui/gui/k-related.ui \ + src/ui/gui/ks-one-sample.ui \ src/ui/gui/oneway.ui \ src/ui/gui/paired-samples.ui \ src/ui/gui/psppire.ui \ @@ -83,7 +84,7 @@ context = apps install-icons: for size in 16x16 ; do \ $(MKDIR_P) $(themedir)/$$size/$(context) ; \ - $(INSTALL) $(top_srcdir)/src/ui/gui/psppicon.png $(themedir)/$$size/$(context) ; \ + $(INSTALL) $(top_srcdir)/src/ui/gui/icons/$$size/* $(themedir)/$$size/$(context) ; \ done gtk-update-icon-cache --ignore-theme-index $(themedir) @@ -176,6 +177,8 @@ src_ui_gui_psppire_SOURCES = \ src/ui/gui/k-related-dialog.h \ src/ui/gui/k-means-dialog.c \ src/ui/gui/k-means-dialog.h \ + src/ui/gui/ks-one-sample-dialog.c \ + src/ui/gui/ks-one-sample-dialog.h \ src/ui/gui/main.c \ src/ui/gui/missing-val-dialog.c \ src/ui/gui/missing-val-dialog.h \ diff --git a/src/ui/gui/data-editor.ui b/src/ui/gui/data-editor.ui index c7faa8a9..985d6708 100644 --- a/src/ui/gui/data-editor.ui +++ b/src/ui/gui/data-editor.ui @@ -451,6 +451,12 @@ R_uns... + + + ks-one-sample + 1-Sample _K-S... + + "two-related-samples"> @@ -598,6 +604,7 @@ + diff --git a/src/ui/gui/ks-one-sample-dialog.c b/src/ui/gui/ks-one-sample-dialog.c new file mode 100644 index 00000000..c9a1bcbd --- /dev/null +++ b/src/ui/gui/ks-one-sample-dialog.c @@ -0,0 +1,199 @@ +/* PSPPIRE - a graphical user interface for PSPP. + Copyright (C) 2011 Free Software Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include +#include + +#include "ks-one-sample-dialog.h" +#include "psppire-selector.h" +#include "psppire-dictview.h" +#include "psppire-dialog.h" + +#include "psppire-data-window.h" +#include "psppire-var-view.h" + +#include "executor.h" +#include "helper.h" +#include "dialog-common.h" + +#include + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + + +enum + { + CB_NORMAL, + CB_POISSON, + CB_UNIFORM, + CB_EXPONENTIAL + }; + +struct ks_one_sample +{ + GtkBuilder *xml; + PsppireDict *dict; + + GtkWidget *variables; + PsppireDataWindow *de ; + + GtkWidget *cb[4]; +}; + +static char * generate_syntax (const struct ks_one_sample *rd); + + +static void +refresh (struct ks_one_sample *fd) +{ + int i; + GtkTreeModel *liststore = + gtk_tree_view_get_model (GTK_TREE_VIEW (fd->variables)); + gtk_list_store_clear (GTK_LIST_STORE (liststore)); + + for (i = 0; i < 4; ++i) + gtk_toggle_button_set_active (GTK_TOGGLE_BUTTON (fd->cb[i]), FALSE); +} + + +static gboolean +dialog_state_valid (gpointer data) +{ + int i; + struct ks_one_sample *fd = data; + + GtkTreeModel *liststore = gtk_tree_view_get_model (GTK_TREE_VIEW (fd->variables)); + + if (gtk_tree_model_iter_n_children (liststore, NULL) < 1) + return FALSE; + + for (i = 0; i < 4; ++i) + { + if ( TRUE == gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (fd->cb[i]))) + break; + } + if ( i >= 4) + return FALSE; + + + return TRUE; +} + + +/* Pops up the Ks_One_Sample dialog box */ +void +ks_one_sample_dialog (PsppireDataWindow *dw) +{ + struct ks_one_sample fd; + gint response; + + PsppireVarStore *vs; + + GtkWidget *dialog ; + GtkWidget *source ; + + fd.xml = builder_new ("ks-one-sample.ui"); + + dialog = get_widget_assert (fd.xml, "ks-one-sample-dialog"); + source = get_widget_assert (fd.xml, "dict-view"); + + fd.cb[CB_NORMAL] = get_widget_assert (fd.xml, "checkbutton-normal"); + fd.cb[CB_POISSON] = get_widget_assert (fd.xml, "checkbutton-poisson"); + fd.cb[CB_UNIFORM] = get_widget_assert (fd.xml, "checkbutton-uniform"); + fd.cb[CB_EXPONENTIAL] = get_widget_assert (fd.xml, "checkbutton-exp"); + + fd.de = dw; + + g_signal_connect_swapped (dialog, "refresh", G_CALLBACK (refresh), &fd); + + + fd.variables = get_widget_assert (fd.xml, "psppire-var-view1"); + + g_object_get (fd.de->data_editor, "var-store", &vs, NULL); + + gtk_window_set_transient_for (GTK_WINDOW (dialog), GTK_WINDOW (fd.de)); + + g_object_get (vs, "dictionary", &fd.dict, NULL); + g_object_set (source, "model", fd.dict, + "predicate", var_is_numeric, + NULL); + + psppire_dialog_set_valid_predicate (PSPPIRE_DIALOG (dialog), + dialog_state_valid, &fd); + + response = psppire_dialog_run (PSPPIRE_DIALOG (dialog)); + + switch (response) + { + case GTK_RESPONSE_OK: + g_free (execute_syntax_string (dw, generate_syntax (&fd))); + break; + case PSPPIRE_RESPONSE_PASTE: + g_free (paste_syntax_to_window (generate_syntax (&fd))); + break; + default: + break; + } + + g_object_unref (fd.xml); +} + + + +static void +append_fragment (GString *string, const gchar *dist, PsppireVarView *vv) +{ + g_string_append (string, "\n\t/KOLMOGOROV-SMIRNOV"); + + g_string_append (string, " ( "); + g_string_append (string, dist); + g_string_append (string, " ) = "); + + psppire_var_view_append_names (vv, 0, string); +} + + +char * +generate_syntax (const struct ks_one_sample *rd) +{ + gchar *text; + + GString *string = g_string_new ("NPAR TEST"); + + if ( gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (rd->cb[CB_NORMAL]))) + append_fragment (string, "NORMAL", PSPPIRE_VAR_VIEW (rd->variables)); + + if ( gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (rd->cb[CB_UNIFORM]))) + append_fragment (string, "UNIFORM", PSPPIRE_VAR_VIEW (rd->variables)); + + if ( gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (rd->cb[CB_POISSON]))) + append_fragment (string, "POISSON", PSPPIRE_VAR_VIEW (rd->variables)); + + if ( gtk_toggle_button_get_active (GTK_TOGGLE_BUTTON (rd->cb[CB_EXPONENTIAL]))) + append_fragment (string, "EXPONENTIAL", PSPPIRE_VAR_VIEW (rd->variables)); + + g_string_append (string, ".\n"); + + text = string->str; + + g_string_free (string, FALSE); + + return text; +} diff --git a/src/ui/gui/ks-one-sample-dialog.h b/src/ui/gui/ks-one-sample-dialog.h new file mode 100644 index 00000000..59f45895 --- /dev/null +++ b/src/ui/gui/ks-one-sample-dialog.h @@ -0,0 +1,24 @@ +/* PSPPIRE - a graphical user interface for PSPP. + Copyright (C) 2011 Free Software Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef __KS_ONE_SAMPLE_DIALOG_H +#define __KS_ONE_SAMPLE_DIALOG_H + +#include "psppire-data-window.h" + +void ks_one_sample_dialog (PsppireDataWindow * data); + +#endif diff --git a/src/ui/gui/ks-one-sample.ui b/src/ui/gui/ks-one-sample.ui new file mode 100644 index 00000000..9386e409 --- /dev/null +++ b/src/ui/gui/ks-one-sample.ui @@ -0,0 +1,231 @@ + + + + + + + One-Sample Kolmogorov-Smirnov Test + True + + + True + 2 + + + True + 5 + 5 + 5 + + + True + vertical + 5 + + + True + + + True + True + never + automatic + in + + + True + True + 5 + False + False + + + + + 0 + + + + + True + vertical + True + + + True + True + True + 5 + dict-view + psppire-var-view1 + + + False + False + 0 + + + + + False + False + 5 + 1 + + + + + True + 0 + none + + + True + 12 + + + True + True + never + automatic + in + + + True + True + 5 + False + False + + + + + + + + + True + Test _Variable List: + True + True + psppire-var-view1 + + + + + 2 + + + + + 0 + + + + + 0 + True + + + True + 12 + + + True + 2 + 2 + + + _Normal + True + True + False + none + True + True + + + + + _Poisson + True + True + False + True + True + + + 1 + 2 + + + + + _Uniform + True + True + False + True + True + + + 1 + 2 + + + + + _Exponential + True + True + False + True + True + + + 1 + 2 + 1 + 2 + + + + + + + + + True + Test Distribution + True + + + + + False + 1 + + + + + + + 0 + + + + + True + 5 + + + False + False + end + 1 + + + + + + diff --git a/src/ui/gui/psppire-data-window.c b/src/ui/gui/psppire-data-window.c index fe9f8df6..494f924b 100644 --- a/src/ui/gui/psppire-data-window.c +++ b/src/ui/gui/psppire-data-window.c @@ -53,6 +53,7 @@ #include "ui/gui/psppire.h" #include "ui/gui/rank-dialog.h" #include "ui/gui/runs-dialog.h" +#include "ui/gui/ks-one-sample-dialog.h" #include "ui/gui/recode-dialog.h" #include "ui/gui/regression-dialog.h" #include "ui/gui/reliability-dialog.h" @@ -1125,6 +1126,7 @@ psppire_data_window_finish_init (PsppireDataWindow *de, connect_action (de, "chi-square", G_CALLBACK (chisquare_dialog)); connect_action (de, "binomial", G_CALLBACK (binomial_dialog)); connect_action (de, "runs", G_CALLBACK (runs_dialog)); + connect_action (de, "ks-one-sample", G_CALLBACK (ks_one_sample_dialog)); connect_action (de, "k-related-samples", G_CALLBACK (k_related_dialog)); connect_action (de, "two-related-samples", G_CALLBACK (two_related_dialog)); diff --git a/tests/language/stats/npar.at b/tests/language/stats/npar.at index 28e8e497..cc1bd167 100644 --- a/tests/language/stats/npar.at +++ b/tests/language/stats/npar.at @@ -1148,3 +1148,244 @@ v1 & junk,20,.453,.227,.164 ]) AT_CLEANUP + + +AT_SETUP([NPAR TESTS Kolmogorov-Smirnov Uniform parameters given]) + +AT_DATA([ks-uniform.sps], [dnl +set format F12.3. +data list notable list /x *. +begin data +.554 +.382 +.329 +.480 +.711 +.503 +.203 +.477 +.621 +.581 +end data. + +npar tests k-s (uniform 0 1) = x. +]) + +AT_CHECK([pspp -O format=csv ks-uniform.sps], [0], [dnl +Table: One-Sample Kolmogorov-Smirnov Test +,,x +N,,10 +Uniform Parameters,Minimum,.000 +,Maximum,1.000 +Most Extreme Differences,Absolute,.289 +,Positive,.289 +,Negative,-.229 +Kolmogorov-Smirnov Z,,.914 +Asymp. Sig. (2-tailed),,.374 +]) + +AT_CLEANUP + + +AT_SETUP([NPAR TESTS Kolmogorov-Smirnov Normal parameters imputed]) + +AT_DATA([ks-normal.sps], [dnl +set format = F12.3. + +data list notable list /foo * bar *. +begin data. +65 12.5 +59 14.2 +43 12.6 +57 +68 +79 +51 +62 +57 +73 +58 +58 +68 +75 +47 +70 +59 +71 +52 +48 13.0 +58 14.1 +37 15.0 +39 13.1 +58 13.2 +43 14.5 +58 13.5 +86 14.0 +63 12.5 +80 12.8 +70 +63 +53 +53 +48 +49 +51 +47 +81 +66 +78 +65 +69 +70 12.1 +63 12.5 +64 12.4 +39 13.8 +51 13.2 +68 14.0 +76 12.6 +53 12.1 +71 13.5 +47 13.8 +87 14.1 +72 12.9 +48 12.1 +75 12.8 +51 13.4 +63 13.9 +61 12.5 +61 12.4 +66 12.8 +82 12.9 +81 13.6 +46 +52 +71 +73 +58 +57 +46 +58 +52 13.5 +71 13.2 +57 12.8 +78 14.1 +73 12.1 +50 12.6 +71 +51 +51 +68 +84 +64 +66 +65 +52 +56 +70 +68 +66 +78 +65 +71 +53 +81 +53 +57 +64 +61 +43 +56 +37 +74 +66 +81 +67 +80 +68 +76 +70 +80 +42 +74 +80 +70 +60 +39 +72 +69 +63 +72 +63 +49 +53 13.2 +43 13.8 +51 12.5 +63 12.6 +64 12.9 +65 13.0 +64 12.5 +66 12.0 +55 +62 +58 +48 +67 +46 +36 +61 +55 +77 +74 +60 +70 +69 +57 +49 +63 +69 +63 +76 +53 +54 +42 +64 +66 +61 +62 +73 +73 +60 +79 +40 +48 +76 +60 +76 +54 +69 +65 +69 +51 +54 +82 +end data. + +npar tests + /k-s (normal) = foo bar. +]) + +AT_CHECK([pspp -O format=csv ks-normal.sps], [0], [dnl +Table: One-Sample Kolmogorov-Smirnov Test +,,foo,bar +N,,174,48 +Normal Parameters,Mean,62.109,13.108 +,Std. Deviation,11.548,.718 +Most Extreme Differences,Absolute,.059,.115 +,Positive,.055,.115 +,Negative,-.059,-.082 +Kolmogorov-Smirnov Z,,.785,.795 +Asymp. Sig. (2-tailed),,.569,.552 +]) + + +AT_CLEANUP -- 2.30.2