msgstr ""
"Project-Id-Version: PSPP 0.7.6\n"
"Report-Msgid-Bugs-To: pspp-dev@gnu.org\n"
-"POT-Creation-Date: 2010-10-17 22:08+0200\n"
+"POT-Creation-Date: 2010-10-21 19:40+0200\n"
"PO-Revision-Date: 2010-10-17 16:51+0200\n"
"Last-Translator: John Darrington <john@darrington.wattle.id.au>\n"
"Language-Team: John Darrington <john@darrington.wattle.id.au>\n"
"Unrecognised record type 7, subtype %d. Please send a copy of this file, "
"and the syntax which created it to %s"
-
#: src/data/sys-file-reader.c:919 tests/dissect-sysfile.c:595
#, c-format
msgid "Bad size (%zu) or count (%zu) field on record type 7, subtype 3."
msgstr "Syntax error at ‘%s’"
#: src/language/lexer/lexer.c:457 src/language/xforms/select-if.c:60
-#: src/language/stats/autorecode.c:161 src/language/stats/npar.c:310
+#: src/language/stats/autorecode.c:161 src/language/stats/npar.c:332
#: src/language/data-io/print-space.c:73
msgid "expecting end of command"
msgstr ""
msgid "String expected following `+'."
msgstr "String expected following ‘+’."
-#: src/language/lexer/format-parser.c:88
+#: src/language/lexer/format-parser.c:79
msgid "expecting valid format specifier"
msgstr ""
-#: src/language/lexer/format-parser.c:107
-#: src/language/lexer/format-parser.c:126
+#: src/language/lexer/format-parser.c:118
+#: src/language/lexer/format-parser.c:138
#: src/language/data-io/placement-parser.c:226
#, c-format
msgid "Unknown format type `%s'."
msgstr "Unknown format type ‘%s’."
-#: src/language/lexer/format-parser.c:121
+#: src/language/lexer/format-parser.c:133
msgid "expecting format type"
msgstr ""
msgid "Mean Rank"
msgstr ""
-#: src/language/stats/npar.c:233 src/language/stats/npar.c:260
+#: src/language/stats/npar.c:255 src/language/stats/npar.c:282
#, c-format
msgid "The %s subcommand may be given only once."
msgstr ""
-#: src/language/stats/npar.c:343
+#: src/language/stats/npar.c:365
msgid "NPAR subcommand not currently implemented."
msgstr ""
-#: src/language/stats/npar.c:496
+#: src/language/stats/npar.c:519
+msgid "Expecting MEAN, MEDIAN, MODE or number"
+msgstr ""
+
+#: src/language/stats/npar.c:577
#, c-format
msgid ""
"The specified value of HI (%d) is lower than the specified value of LO (%d)"
msgstr ""
-#: src/language/stats/npar.c:551
+#: src/language/stats/npar.c:632
#, c-format
msgid ""
"%d expected values were given, but the specified range (%d-%d) requires "
"exactly %d values."
msgstr ""
-#: src/language/stats/npar.c:690 src/language/stats/t-test.q:380
+#: src/language/stats/npar.c:771 src/language/stats/t-test.q:380
#, c-format
msgid ""
"PAIRED was specified but the number of variables preceding WITH (%zu) did "
msgid "1 - Specificity"
msgstr ""
+#: src/language/stats/runs.c:166
+#, c-format
+msgid ""
+"Multiple modes exist for varible `%s'. Using %g as the threshold value."
+msgstr ""
+"Multiple modes exist for varible ‘%s’. Using %g as the threshold value."
+
+#: src/language/stats/runs.c:315
+msgid "Runs Test"
+msgstr ""
+
+#: src/language/stats/runs.c:360
+#, fuzzy
+msgid "Test Value "
+msgstr "Cases ≥ Test Value"
+
+#: src/language/stats/runs.c:364
+msgid "Test Value (mode)"
+msgstr ""
+
+#: src/language/stats/runs.c:368
+msgid "Test Value (mean)"
+msgstr ""
+
+#: src/language/stats/runs.c:372
+msgid "Test Value (median)"
+msgstr ""
+
+#: src/language/stats/runs.c:377
+msgid "Cases < Test Value"
+msgstr ""
+
+#: src/language/stats/runs.c:380
+msgid "Cases >= Test Value"
+msgstr "Cases ≥ Test Value"
+
+#: src/language/stats/runs.c:383
+msgid "Total Cases"
+msgstr ""
+
+#: src/language/stats/runs.c:386
+msgid "Number of Runs"
+msgstr ""
+
+#: src/language/stats/runs.c:389 src/language/stats/wilcoxon.c:317
+msgid "Z"
+msgstr ""
+
+#: src/language/stats/runs.c:392 src/language/stats/wilcoxon.c:318
+#: src/language/stats/crosstabs.q:1209
+msgid "Asymp. Sig. (2-tailed)"
+msgstr ""
+
#: src/language/stats/sign.c:90
msgid "Negative Differences"
msgstr ""
msgid "Positive Ranks"
msgstr ""
-#: src/language/stats/wilcoxon.c:317
-msgid "Z"
-msgstr ""
-
-#: src/language/stats/wilcoxon.c:318 src/language/stats/crosstabs.q:1209
-msgid "Asymp. Sig. (2-tailed)"
-msgstr ""
-
#: src/language/data-io/combine-files.c:210
msgid "Cannot specify the active file since no active file has been defined."
msgstr ""
#include <language/lexer/value-parser.h>
#include <language/stats/binomial.h>
#include <language/stats/chisquare.h>
+#include <language/stats/runs.h>
#include <language/stats/kruskal-wallis.h>
#include <language/stats/wilcoxon.h>
#include <language/stats/sign.h>
int binomial;
int wilcoxon;
int sign;
+ int runs;
int kruskal_wallis;
int missing;
int method;
/* Prototype for custom subcommands of NPAR TESTS. */
static int npar_chisquare (struct lexer *, struct dataset *, struct npar_specs *);
static int npar_binomial (struct lexer *, struct dataset *, struct npar_specs *);
+static int npar_runs (struct lexer *lexer, struct dataset *, struct npar_specs *);
+
static int npar_wilcoxon (struct lexer *, struct dataset *, struct npar_specs *);
static int npar_sign (struct lexer *, struct dataset *, struct npar_specs *);
static int npar_kruskal_wallis (struct lexer *, struct dataset *, struct npar_specs *);
npt->chisquare = 0;
npt->binomial = 0;
npt->wilcoxon = 0;
+ npt->runs = 0;
npt->sign = 0;
npt->missing = 0;
npt->miss = MISS_ANALYSIS;
memset (npt->a_statistics, 0, sizeof npt->a_statistics);
for (;;)
{
- if (lex_match_hyphenated_word (lexer, "CHISQUARE"))
+ if (lex_match_hyphenated_word (lexer, "RUNS"))
+ {
+ npt->runs++;
+ switch (npar_runs (lexer, ds, nps))
+ {
+ case 0:
+ goto lossage;
+ case 1:
+ break;
+ case 2:
+ lex_error (lexer, NULL);
+ goto lossage;
+ default:
+ NOT_REACHED ();
+ }
+
+ }
+ else if (lex_match_hyphenated_word (lexer, "CHISQUARE"))
{
lex_match (lexer, '=');
npt->chisquare++;
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
}
+static int
+npar_runs (struct lexer *lexer, struct dataset *ds,
+ struct npar_specs *specs)
+{
+ struct runs_test *rt = pool_alloc (specs->pool, sizeof (*rt));
+ struct one_sample_test *tp = &rt->parent;
+ struct npar_test *nt = &tp->parent;
+
+ nt->execute = runs_execute;
+ nt->insert_variables = one_sample_insert_variables;
+
+ if ( lex_force_match (lexer, '(') )
+ {
+ if ( lex_match_id (lexer, "MEAN"))
+ {
+ rt->cp_mode = CP_MEAN;
+ }
+ else if (lex_match_id (lexer, "MEDIAN"))
+ {
+ rt->cp_mode = CP_MEDIAN;
+ }
+ else if (lex_match_id (lexer, "MODE"))
+ {
+ rt->cp_mode = CP_MODE;
+ }
+ else if (lex_is_number (lexer))
+ {
+ rt->cutpoint = lex_number (lexer);
+ rt->cp_mode = CP_CUSTOM;
+ lex_get (lexer);
+ }
+ else
+ {
+ lex_error (lexer, _("Expecting MEAN, MEDIAN, MODE or number"));
+ return 0;
+ }
+
+ lex_force_match (lexer, ')');
+ lex_force_match (lexer, '=');
+ if (!parse_variables_const_pool (lexer, specs->pool, dataset_dict (ds),
+ &tp->vars, &tp->n_vars,
+ PV_NO_SCRATCH | PV_NO_DUPLICATE | PV_NUMERIC))
+ {
+ return 2;
+ }
+ }
+
+ specs->n_tests++;
+ specs->test = pool_realloc (specs->pool,
+ specs->test,
+ sizeof (*specs->test) * specs->n_tests);
+
+ specs->test[specs->n_tests - 1] = nt;
+
+ return 1;
+}
+
+
static int
npar_chisquare (struct lexer *lexer, struct dataset *ds,
struct npar_specs *specs)
struct one_sample_test *tp = &cstp->parent;
struct npar_test *nt = &tp->parent;
+
nt->execute = chisquare_execute;
nt->insert_variables = one_sample_insert_variables;
--- /dev/null
+/* PSPP - a program for statistical analysis. -*-c-*-
+ Copyright (C) 2010 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include <config.h>
+
+#include "runs.h"
+
+#include <gsl/gsl_cdf.h>
+#include <math.h>
+
+#include <data/format.h>
+
+#include <libpspp/misc.h>
+#include <libpspp/message.h>
+#include <data/procedure.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
+#include <data/casegrouper.h>
+#include <data/dictionary.h>
+#include <data/subcase.h>
+#include <data/variable.h>
+#include <math/percentiles.h>
+#include <math/sort.h>
+
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+
+struct run_state
+{
+ /* The value used to dichotimise the data */
+ double cutpoint;
+
+ /* The number of cases not less than cutpoint */
+ double np;
+
+ /* The number of cases less than cutpoint */
+ double nn;
+
+ /* The sum of np and nn */
+ double n;
+
+ /* The number of runs */
+ long runs;
+
+ /* The sign of the last case seen */
+ short last_sign;
+};
+
+
+
+/* Return the Z statistic representing the assympototic
+ distribution of the the number of runs */
+static double
+runs_statistic (const struct run_state *rs)
+{
+ double z;
+ double sigma;
+ double mu = 2 * rs->np * rs->nn;
+ mu /= rs->np + rs->nn;
+ mu += 1.0;
+
+ z = rs->runs - mu;
+
+ if ( rs->n < 50)
+ {
+ if (z <= -0.5)
+ z += 0.5;
+ else if (z >= 0.5)
+ z -= 0.5;
+ else
+ return 0;
+ }
+
+ sigma = 2 * rs->np * rs->nn;
+ sigma *= 2 * rs->np * rs->nn - rs->nn - rs->np;
+ sigma /= pow2 (rs->np + rs->nn);
+ sigma /= rs->np + rs->nn - 1.0;
+ sigma = sqrt (sigma);
+
+ z /= sigma;
+
+ return z;
+}
+
+static void show_runs_result (const struct runs_test *, const struct run_state *, const struct dictionary *);
+
+void
+runs_execute (const struct dataset *ds,
+ struct casereader *input,
+ enum mv_class exclude,
+ const struct npar_test *test,
+ bool exact UNUSED,
+ double timer UNUSED)
+{
+ int v;
+ struct ccase *c;
+ const struct dictionary *dict = dataset_dict (ds);
+ const struct variable *weight = dict_get_weight (dict);
+
+ struct one_sample_test *otp = UP_CAST (test, struct one_sample_test, parent);
+ struct runs_test *rt = UP_CAST (otp, struct runs_test, parent);
+ struct run_state *rs = xcalloc (otp->n_vars, sizeof (*rs));
+
+ switch ( rt->cp_mode)
+ {
+ case CP_MODE:
+ {
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ bool multimodal = false;
+ struct run_state *run = &rs[v];
+ double last_cc;
+ struct casereader *group = NULL;
+ struct casegrouper *grouper;
+ struct casereader *reader = casereader_clone (input);
+ const struct variable *var = otp->vars[v];
+
+ reader = sort_execute_1var (reader, var);
+
+ grouper = casegrouper_create_vars (reader, &var, 1);
+ last_cc = SYSMIS;
+ while (casegrouper_get_next_group (grouper, &group))
+ {
+ double x = SYSMIS;
+ double cc = 0.0;
+ struct ccase *c;
+ for (; (c = casereader_read (group)); case_unref (c))
+ {
+ const double w = weight ? case_data (c, weight)->f: 1.0;
+ const union value *val = case_data (c, var);
+ if ( var_is_value_missing (var, val, exclude))
+ continue;
+ x = val->f;
+ cc += w;
+ }
+
+ if ( cc > last_cc)
+ {
+ run->cutpoint = x;
+ }
+ else if ( cc == last_cc)
+ {
+ multimodal = true;
+ if ( x > run->cutpoint)
+ run->cutpoint = x;
+ }
+ last_cc = cc;
+ casereader_destroy (group);
+ }
+ casegrouper_destroy (grouper);
+ if (multimodal)
+ msg (MW, _("Multiple modes exist for varible `%s'. Using %g as the threshold value."),
+ var_get_name (var), run->cutpoint);
+ }
+ }
+ break;
+ case CP_MEDIAN:
+ {
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ double cc = 0.0;
+ struct ccase *c;
+ struct run_state *run = &rs[v];
+ struct casereader *reader = casereader_clone (input);
+ const struct variable *var = otp->vars[v];
+ struct casewriter *writer;
+ struct percentile *median;
+ struct order_stats *os;
+ struct subcase sc;
+ subcase_init_var (&sc, var, SC_ASCEND);
+ writer = sort_create_writer (&sc, casereader_get_proto (reader));
+
+ for (; (c = casereader_read (reader)); case_unref (c))
+ {
+ const union value *val = case_data (c, var);
+ const double w = weight ? case_data (c, weight)->f: 1.0;
+ if ( var_is_value_missing (var, val, exclude))
+ continue;
+
+ cc += w;
+ casewriter_write (writer, c);
+ }
+ subcase_destroy (&sc);
+ casereader_destroy (reader);
+ reader = casewriter_make_reader (writer);
+
+ median = percentile_create (0.5, cc);
+ os = &median->parent;
+
+ order_stats_accumulate (&os, 1,
+ reader,
+ weight,
+ var,
+ exclude);
+
+ run->cutpoint = percentile_calculate (median, PC_HAVERAGE);
+ statistic_destroy (&median->parent.parent);
+ }
+ }
+ break;
+ case CP_MEAN:
+ {
+ struct casereader *reader = casereader_clone (input);
+ for (; (c = casereader_read (reader)); case_unref (c))
+ {
+ const double w = weight ? case_data (c, weight)->f: 1.0;
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ const struct variable *var = otp->vars[v];
+ const union value *val = case_data (c, var);
+ const double x = val->f;
+ struct run_state *run = &rs[v];
+
+ if ( var_is_value_missing (var, val, exclude))
+ continue;
+
+ run->cutpoint += x * w;
+ run->n += w;
+ }
+ }
+ casereader_destroy (reader);
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ struct run_state *run = &rs[v];
+ run->cutpoint /= run->n;
+ }
+ }
+ break;
+ case CP_CUSTOM:
+ {
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ struct run_state *run = &rs[v];
+ run->cutpoint = rt->cutpoint;
+ }
+ }
+ break;
+ }
+
+ for (; (c = casereader_read (input)); case_unref (c))
+ {
+ const double w = weight ? case_data (c, weight)->f: 1.0;
+
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ struct run_state *run = &rs[v];
+ const struct variable *var = otp->vars[v];
+ const union value *val = case_data (c, var);
+ double x = val->f;
+ double d = x - run->cutpoint;
+ short sign = 0;
+
+ if ( var_is_value_missing (var, val, exclude))
+ continue;
+
+ if (d >= 0)
+ {
+ sign = +1;
+ run->np += w;
+ }
+ else
+ {
+ sign = -1;
+ run->nn += w;
+ }
+
+ if (sign != run->last_sign)
+ run->runs++;
+
+ run->last_sign = sign;
+ }
+ }
+ casereader_destroy (input);
+
+ for (v = 0; v < otp->n_vars; ++v)
+ {
+ struct run_state *run = &rs[v];
+ run->n = run->np + run->nn;
+ }
+
+ show_runs_result (rt, rs, dict);
+
+ free (rs);
+}
+
+\f
+#include <output/tab.h>
+
+static void
+show_runs_result (const struct runs_test *rt, const struct run_state *rs, const struct dictionary *dict)
+{
+ const struct variable *weight = dict_get_weight (dict);
+ const struct fmt_spec *wfmt = weight ? var_get_print_format (weight) : &F_8_0;
+
+ const struct one_sample_test *otp = &rt->parent;
+
+ int i;
+ const int row_headers = 1;
+ const int column_headers = 1;
+ struct tab_table *table =
+ tab_create (row_headers + otp->n_vars, column_headers + 7);
+
+ tab_headers (table, row_headers, 0, column_headers, 0);
+
+ tab_title (table, _("Runs Test"));
+
+ /* Box around the table and vertical lines inside*/
+ tab_box (table, TAL_2, TAL_2, -1, TAL_1,
+ 0, 0, tab_nc (table) - 1, tab_nr (table) - 1 );
+
+ tab_hline (table, TAL_2, 0, tab_nc (table) -1, column_headers);
+ tab_vline (table, TAL_2, row_headers, 0, tab_nr (table) - 1);
+
+ for (i = 0 ; i < otp->n_vars; ++i)
+ {
+ const struct run_state *run = &rs[i];
+
+ double z = runs_statistic (run);
+
+ tab_text (table, row_headers + i, 0,
+ TAT_TITLE | TAB_CENTER ,
+ var_to_string (otp->vars[i]));
+
+ tab_double (table, row_headers +i, 1, 0,
+ run->cutpoint, 0);
+
+ tab_double (table, row_headers +i, 2, 0,
+ run->nn, wfmt);
+
+ tab_double (table, row_headers +i, 3, 0,
+ run->np, wfmt);
+
+ tab_double (table, row_headers +i, 4, 0,
+ run->n, wfmt);
+
+ tab_double (table, row_headers +i, 5, 0,
+ run->runs, &F_8_0);
+
+ tab_double (table, row_headers +i, 6, 0,
+ z, 0);
+
+ tab_double (table, row_headers +i, 7, 0,
+ 2.0 * gsl_cdf_ugaussian_P (z), 0);
+ }
+
+ switch ( rt->cp_mode)
+ {
+ case CP_CUSTOM:
+ tab_text (table, 0, column_headers ,
+ TAT_TITLE | TAB_LEFT , _("Test Value "));
+ break;
+ case CP_MODE:
+ tab_text (table, 0, column_headers ,
+ TAT_TITLE | TAB_LEFT , _("Test Value (mode)"));
+ break;
+ case CP_MEAN:
+ tab_text (table, 0, column_headers ,
+ TAT_TITLE | TAB_LEFT , _("Test Value (mean)"));
+ break;
+ case CP_MEDIAN:
+ tab_text (table, 0, column_headers ,
+ TAT_TITLE | TAB_LEFT , _("Test Value (median)"));
+ break;
+ }
+
+ tab_text (table, 0, column_headers + 1,
+ TAT_TITLE | TAB_LEFT , _("Cases < Test Value"));
+
+ tab_text (table, 0, column_headers + 2,
+ TAT_TITLE | TAB_LEFT , _("Cases >= Test Value"));
+
+ tab_text (table, 0, column_headers + 3,
+ TAT_TITLE | TAB_LEFT , _("Total Cases"));
+
+ tab_text (table, 0, column_headers + 4,
+ TAT_TITLE | TAB_LEFT , _("Number of Runs"));
+
+ tab_text (table, 0, column_headers + 5,
+ TAT_TITLE | TAB_LEFT , _("Z"));
+
+ tab_text (table, 0, column_headers + 6,
+ TAT_TITLE | TAB_LEFT , _("Asymp. Sig. (2-tailed)"));
+
+ tab_submit (table);
+}
+
+