From 26a2fc80cb850d182030635c3720fbb6db40ae70 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Mon, 1 Nov 2004 04:51:21 +0000 Subject: [PATCH] Added test for ntiles. Fixed q2c parsing of integer/double subcommands --- po/en_GB.po | 61 +++++++++++------------- po/pspp.pot | 56 ++++++++++------------ src/ChangeLog | 7 +++ src/frequencies.q | 84 ++++++++++++++------------------ src/q2c.c | 37 +++++++++++---- src/set.q | 20 ++++---- src/t-test.q | 8 ++-- tests/Makefile.am | 1 + tests/stats/ntiles.sh | 108 ++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 245 insertions(+), 137 deletions(-) create mode 100755 tests/stats/ntiles.sh diff --git a/po/en_GB.po b/po/en_GB.po index 041fd622..f319976d 100644 --- a/po/en_GB.po +++ b/po/en_GB.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.3.1\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2004-10-30 17:37+0800\n" +"POT-Creation-Date: 2004-11-01 11:56+0800\n" "PO-Revision-Date: 2004-01-23 13:04+0800\n" "Last-Translator: John Darrington \n" "Language-Team: John Darrington \n" @@ -3808,7 +3808,7 @@ msgstr "" #: src/sysfile-info.c:529 src/vfm.c:875 src/crosstabs.q:1068 #: src/crosstabs.q:1095 src/crosstabs.q:1115 src/crosstabs.q:1137 -#: src/frequencies.q:1068 src/frequencies.q:1186 +#: src/frequencies.q:1058 src/frequencies.q:1176 msgid "Value" msgstr "" @@ -4033,26 +4033,26 @@ msgstr "" msgid "Cases" msgstr "" -#: src/crosstabs.q:772 src/frequencies.q:1066 src/frequencies.q:1433 +#: src/crosstabs.q:772 src/frequencies.q:1056 src/frequencies.q:1423 msgid "Valid" msgstr "" -#: src/crosstabs.q:773 src/frequencies.q:1133 src/frequencies.q:1434 +#: src/crosstabs.q:773 src/frequencies.q:1123 src/frequencies.q:1424 msgid "Missing" msgstr "" #: src/crosstabs.q:774 src/crosstabs.q:977 src/crosstabs.q:1690 -#: src/frequencies.q:1142 src/oneway.q:287 src/oneway.q:464 +#: src/frequencies.q:1132 src/oneway.q:287 src/oneway.q:464 msgid "Total" msgstr "" -#: src/crosstabs.q:784 src/frequencies.q:1432 src/oneway.q:374 +#: src/crosstabs.q:784 src/frequencies.q:1422 src/oneway.q:374 #: src/t-test.q:680 src/t-test.q:703 src/t-test.q:828 src/t-test.q:1365 msgid "N" msgstr "" -#: src/crosstabs.q:785 src/frequencies.q:1070 src/frequencies.q:1071 -#: src/frequencies.q:1072 +#: src/crosstabs.q:785 src/frequencies.q:1060 src/frequencies.q:1061 +#: src/frequencies.q:1062 msgid "Percent" msgstr "" @@ -4324,85 +4324,76 @@ msgstr "" msgid "S.E. Skew" msgstr "" -#: src/frequencies.q:289 +#: src/frequencies.q:306 msgid "" "At most one of BARCHART, HISTOGRAM, or HBAR should be given. HBAR will be " "assumed. Argument values will be given precedence increasing along the " "order given." msgstr "" -#: src/frequencies.q:372 +#: src/frequencies.q:389 #, c-format msgid "" "MAX must be greater than or equal to MIN, if both are specified. However, " "MIN was specified as %g and MAX as %g. MIN and MAX will be ignored." msgstr "" -#: src/frequencies.q:696 +#: src/frequencies.q:713 msgid "" "Upper limit of integer mode value range must be greater than lower limit." msgstr "" -#: src/frequencies.q:708 +#: src/frequencies.q:725 #, c-format msgid "Variable %s specified multiple times on VARIABLES subcommand." msgstr "" -#: src/frequencies.q:721 +#: src/frequencies.q:738 #, c-format msgid "Integer mode specified, but %s is not a numeric variable." msgstr "" -#: src/frequencies.q:783 +#: src/frequencies.q:800 msgid "`)' expected after GROUPED interval list." msgstr "" -#: src/frequencies.q:796 +#: src/frequencies.q:813 #, c-format msgid "Variables %s specified on GROUPED but not on VARIABLES." msgstr "" -#: src/frequencies.q:799 +#: src/frequencies.q:816 #, c-format msgid "Variables %s specified multiple times on GROUPED subcommand." msgstr "" -#: src/frequencies.q:855 -msgid "Percentile list expected after PERCENTILES." -msgstr "" - -#: src/frequencies.q:863 -#, fuzzy -msgid "Percentiles must be between 0 and 100." -msgstr "Frame colour must be between 0 and 6." - -#: src/frequencies.q:1067 src/frequencies.q:1158 src/frequencies.q:1159 -#: src/frequencies.q:1189 +#: src/frequencies.q:1057 src/frequencies.q:1148 src/frequencies.q:1149 +#: src/frequencies.q:1179 msgid "Cum" msgstr "" -#: src/frequencies.q:1069 +#: src/frequencies.q:1059 msgid "Frequency" msgstr "" -#: src/frequencies.q:1088 +#: src/frequencies.q:1078 msgid "Value Label" msgstr "" -#: src/frequencies.q:1187 +#: src/frequencies.q:1177 msgid "Freq" msgstr "" -#: src/frequencies.q:1188 src/frequencies.q:1190 +#: src/frequencies.q:1178 src/frequencies.q:1180 msgid "Pct" msgstr "" -#: src/frequencies.q:1406 +#: src/frequencies.q:1396 #, c-format msgid "No valid data for variable %s; statistics not displayed." msgstr "" -#: src/frequencies.q:1445 +#: src/frequencies.q:1435 msgid "Percentiles" msgstr "" @@ -4846,3 +4837,7 @@ msgstr "" #, c-format msgid "%s & %s" msgstr "" + +#, fuzzy +#~ msgid "Percentiles must be between 0 and 100." +#~ msgstr "Frame colour must be between 0 and 6." diff --git a/po/pspp.pot b/po/pspp.pot index 86a951e7..b10d5db7 100644 --- a/po/pspp.pot +++ b/po/pspp.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2004-10-30 17:37+0800\n" +"POT-Creation-Date: 2004-11-01 11:56+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -3808,7 +3808,7 @@ msgstr "" #: src/sysfile-info.c:529 src/vfm.c:875 src/crosstabs.q:1068 #: src/crosstabs.q:1095 src/crosstabs.q:1115 src/crosstabs.q:1137 -#: src/frequencies.q:1068 src/frequencies.q:1186 +#: src/frequencies.q:1058 src/frequencies.q:1176 msgid "Value" msgstr "" @@ -4033,26 +4033,26 @@ msgstr "" msgid "Cases" msgstr "" -#: src/crosstabs.q:772 src/frequencies.q:1066 src/frequencies.q:1433 +#: src/crosstabs.q:772 src/frequencies.q:1056 src/frequencies.q:1423 msgid "Valid" msgstr "" -#: src/crosstabs.q:773 src/frequencies.q:1133 src/frequencies.q:1434 +#: src/crosstabs.q:773 src/frequencies.q:1123 src/frequencies.q:1424 msgid "Missing" msgstr "" #: src/crosstabs.q:774 src/crosstabs.q:977 src/crosstabs.q:1690 -#: src/frequencies.q:1142 src/oneway.q:287 src/oneway.q:464 +#: src/frequencies.q:1132 src/oneway.q:287 src/oneway.q:464 msgid "Total" msgstr "" -#: src/crosstabs.q:784 src/frequencies.q:1432 src/oneway.q:374 +#: src/crosstabs.q:784 src/frequencies.q:1422 src/oneway.q:374 #: src/t-test.q:680 src/t-test.q:703 src/t-test.q:828 src/t-test.q:1365 msgid "N" msgstr "" -#: src/crosstabs.q:785 src/frequencies.q:1070 src/frequencies.q:1071 -#: src/frequencies.q:1072 +#: src/crosstabs.q:785 src/frequencies.q:1060 src/frequencies.q:1061 +#: src/frequencies.q:1062 msgid "Percent" msgstr "" @@ -4324,84 +4324,76 @@ msgstr "" msgid "S.E. Skew" msgstr "" -#: src/frequencies.q:289 +#: src/frequencies.q:306 msgid "" "At most one of BARCHART, HISTOGRAM, or HBAR should be given. HBAR will be " "assumed. Argument values will be given precedence increasing along the " "order given." msgstr "" -#: src/frequencies.q:372 +#: src/frequencies.q:389 #, c-format msgid "" "MAX must be greater than or equal to MIN, if both are specified. However, " "MIN was specified as %g and MAX as %g. MIN and MAX will be ignored." msgstr "" -#: src/frequencies.q:696 +#: src/frequencies.q:713 msgid "" "Upper limit of integer mode value range must be greater than lower limit." msgstr "" -#: src/frequencies.q:708 +#: src/frequencies.q:725 #, c-format msgid "Variable %s specified multiple times on VARIABLES subcommand." msgstr "" -#: src/frequencies.q:721 +#: src/frequencies.q:738 #, c-format msgid "Integer mode specified, but %s is not a numeric variable." msgstr "" -#: src/frequencies.q:783 +#: src/frequencies.q:800 msgid "`)' expected after GROUPED interval list." msgstr "" -#: src/frequencies.q:796 +#: src/frequencies.q:813 #, c-format msgid "Variables %s specified on GROUPED but not on VARIABLES." msgstr "" -#: src/frequencies.q:799 +#: src/frequencies.q:816 #, c-format msgid "Variables %s specified multiple times on GROUPED subcommand." msgstr "" -#: src/frequencies.q:855 -msgid "Percentile list expected after PERCENTILES." -msgstr "" - -#: src/frequencies.q:863 -msgid "Percentiles must be between 0 and 100." -msgstr "" - -#: src/frequencies.q:1067 src/frequencies.q:1158 src/frequencies.q:1159 -#: src/frequencies.q:1189 +#: src/frequencies.q:1057 src/frequencies.q:1148 src/frequencies.q:1149 +#: src/frequencies.q:1179 msgid "Cum" msgstr "" -#: src/frequencies.q:1069 +#: src/frequencies.q:1059 msgid "Frequency" msgstr "" -#: src/frequencies.q:1088 +#: src/frequencies.q:1078 msgid "Value Label" msgstr "" -#: src/frequencies.q:1187 +#: src/frequencies.q:1177 msgid "Freq" msgstr "" -#: src/frequencies.q:1188 src/frequencies.q:1190 +#: src/frequencies.q:1178 src/frequencies.q:1180 msgid "Pct" msgstr "" -#: src/frequencies.q:1406 +#: src/frequencies.q:1396 #, c-format msgid "No valid data for variable %s; statistics not displayed." msgstr "" -#: src/frequencies.q:1445 +#: src/frequencies.q:1435 msgid "Percentiles" msgstr "" diff --git a/src/ChangeLog b/src/ChangeLog index 11de18a8..69c756d4 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +Mon Nov 1 12:46:17 WST 2004 John Darrington + + * q2c.c frequencies.q set.q t-test.q Fixed the q2c parsing of DBL + subcommand types. Changed frequencies.q to use it rather then the + custom parser. Dealt with the consequences. Added a test for NTILES + subcommand of frequencies. + Sat Oct 30 09:16:29 WST 2004 John Darrington * oneway.q Fixed up the behaviour when given missing values diff --git a/src/frequencies.q b/src/frequencies.q index 9eb17172..9a69e972 100644 --- a/src/frequencies.q +++ b/src/frequencies.q @@ -78,8 +78,8 @@ norm:!nonormal/normal, incr:increment(d:inc,"%s>0"); grouped=custom; - ntiles=custom; - percentiles=custom; + ntiles=integer; + +percentiles = double list; statistics[st_]=1|mean,2|semean,3|median,4|mode,5|stddev,6|variance, 7|kurtosis,8|skewness,9|range,10|minimum,11|maximum,12|sum, 13|default,14|seskewness,15|sekurtosis,all,none. @@ -126,6 +126,10 @@ struct percentile int flag2; /* Set to 1 if this percentile value has been found */ }; + +static void add_percentile (double x) ; + + static struct percentile *percentiles; static int n_percentiles; @@ -265,6 +269,28 @@ internal_cmd_frequencies (void) if (chart != GFT_NONE || cmd.sbc_ntiles) cmd.sort = FRQ_AVALUE; + /* Work out what percentiles need to be calculated */ + if ( cmd.sbc_percentiles ) + { + for ( i = 0 ; i < MAXLISTS ; ++i ) + { + int pl; + subc_list_double *ptl_list = &cmd.dl_percentiles[i]; + for ( pl = 0 ; pl < subc_list_double_count(ptl_list); ++pl) + add_percentile(subc_list_double_at(ptl_list,pl) / 100.0 ); + } + } + if ( cmd.sbc_ntiles ) + { + for ( i = 0 ; i < cmd.sbc_ntiles ; ++i ) + { + int j; + for (j = 0; j <= cmd.n_ntiles[i]; ++j ) + add_percentile(j / (double) cmd.n_ntiles[i]); + } + } + + /* Do it! */ procedure_with_splits (precalc, calc, postcalc, NULL); @@ -826,8 +852,14 @@ add_percentile (double x) int i; for (i = 0; i < n_percentiles; i++) - if (x <= percentiles[i].p) - break; + { + /* Do nothing if it's already in the list */ + if ( fabs(x - percentiles[i].p) < DBL_EPSILON ) + return; + + if (x < percentiles[i].p) + break; + } if (i >= n_percentiles || tokval != percentiles[i].p) { @@ -844,50 +876,6 @@ add_percentile (double x) } } -/* Parses the PERCENTILES subcommand, adding user-specified - percentiles to the list. */ -static int -frq_custom_percentiles (struct cmd_frequencies *cmd UNUSED) -{ - lex_match ('='); - if (token != T_NUM) - { - msg (SE, _("Percentile list expected after PERCENTILES.")); - return 0; - } - - do - { - if (tokval < 0 || tokval > 100) - { - msg (SE, _("Percentiles must be between 0 and 100.")); - return 0; - } - - add_percentile (tokval / 100.0); - lex_get (); - lex_match (','); - } - while (token == T_NUM); - return 1; -} - -/* Parses the NTILES subcommand, adding the percentiles that - correspond to the specified evenly-distributed ntiles. */ -static int -frq_custom_ntiles (struct cmd_frequencies *cmd UNUSED) -{ - int i; - - lex_match ('='); - if (!lex_force_int ()) - return 0; - for (i = 1; i < lex_integer (); i++) - add_percentile (1.0 / lex_integer () * i); - lex_get (); - return 1; -} - /* Comparison functions. */ /* Hash of numeric values. */ diff --git a/src/q2c.c b/src/q2c.c index e8f09d8d..39a6cc4e 100644 --- a/src/q2c.c +++ b/src/q2c.c @@ -1137,11 +1137,11 @@ dump_declarations (void) case SBC_INT: case SBC_PINT: - dump (0, "long n_%s;", st_lower (sbc->name)); + dump (0, "long n_%s[MAXLISTS];", st_lower (sbc->name)); break; case SBC_DBL: - dump (0, "double n_%s;", st_lower (sbc->name)); + dump (0, "double n_%s[MAXLISTS];", st_lower (sbc->name)); break; case SBC_DBL_LIST: @@ -1254,16 +1254,23 @@ dump_vars_init (int persistent) break; case SBC_DBL_LIST: - dump (0, "int i;"); - dump (0, "for (i = 0; i < MAXLISTS; ++i)"); dump (1, "{"); + dump (0, "int i;"); + dump (1, "for (i = 0; i < MAXLISTS; ++i)"); dump (0, "subc_list_double_create(&p->dl_%s[i]) ;", st_lower (sbc->name) ); - dump (-1, "}"); + dump (-2, "}"); break; case SBC_DBL: + dump (1, "{"); + dump (0, "int i;"); + dump (1, "for (i = 0; i < MAXLISTS; ++i)"); + dump (0, "p->n_%s[i] = SYSMIS;", st_lower (sbc->name)); + dump (-2, "}"); + break; + case SBC_CUSTOM: /* nothing */ break; @@ -1308,7 +1315,11 @@ dump_vars_init (int persistent) case SBC_INT: case SBC_PINT: - dump (0, "p->n_%s = NOT_LONG;", st_lower (sbc->name)); + dump (1, "{"); + dump (0, "int i;"); + dump (1, "for (i = 0; i < MAXLISTS; ++i)"); + dump (0, "p->n_%s[i] = NOT_LONG;", st_lower (sbc->name)); + dump (-2, "}"); break; default: @@ -1614,7 +1625,8 @@ dump_subcommand (const subcommand *sbc) { dump (1, "if (!lex_force_num ())"); dump (0, "goto lossage;"); - dump (-1, "p->n_%s = lex_double ();", st_lower (sbc->name)); + dump (-1, "p->n_%s[p->sbc_%s - 1] = lex_double ();", + st_lower (sbc->name), st_lower (sbc->name) ); dump (0, "lex_get();"); } else if (sbc->type == SBC_INT) @@ -1638,7 +1650,7 @@ dump_subcommand (const subcommand *sbc) dump (0, "goto lossage;"); dump (-1, "}"); } - dump (-1, "p->n_%s = x;", st_lower (sbc->name)); + dump (0, "p->n_%s[p->sbc_%s - 1] = x;", st_lower (sbc->name), st_lower(sbc->name) ); dump (-1,"}"); } else if (sbc->type == SBC_PINT) @@ -1848,7 +1860,12 @@ dump_aux_subcommand (const subcommand *sbc) } else if (sbc->type == SBC_INT) { - dump (0, "msg(MM,\"%s is %%ld\",p->n_%s);", sbc->name,st_lower(sbc->name) ); + dump (1, "{"); + dump (0, "int i;"); + dump (1, "for (i = 0; i < MAXLISTS; ++i)"); + dump (0, "msg(MM,\"%s is %%ld\",p->n_%s[i]);", sbc->name,st_lower(sbc->name) ); + outdent(); + dump (-1, "}"); } else if (sbc->type == SBC_CUSTOM) { @@ -2105,7 +2122,7 @@ main (int argc, char *argv[]) continue; } - dump (0, "#line %d \"%s\"", oln - 1, ofn); + dump (0, "#line %d \"%s\"", oln + 1, ofn); if (!strcmp (directive, "specification")) { /* Skip leading slash-star line. */ diff --git a/src/set.q b/src/set.q index 3c1b38d5..4c2d66a3 100644 --- a/src/set.q +++ b/src/set.q @@ -1063,8 +1063,8 @@ init_settings(void) cmd.safe = STC_OFF; cmd.dec = STC_DOT; - cmd.n_cpi = 6; - cmd.n_lpi = 10; + cmd.n_cpi[0] = 6; + cmd.n_lpi[0] = 10; cmd.echo = STC_OFF; cmd.more = STC_ON; cmd.headers = STC_YES; @@ -1080,10 +1080,10 @@ init_settings(void) set_journal = xstrdup ("pspp.jnl"); set_journaling = 1; - cmd.n_mxwarns = 100; - cmd.n_mxerrs = 100; - cmd.n_mxloops = 1; - cmd.n_workspace = 4L * 1024 * 1024; + cmd.n_mxwarns[0] = 100; + cmd.n_mxerrs[0] = 100; + cmd.n_mxloops[0] = 1; + cmd.n_workspace[0] = 4L * 1024 * 1024; #if !USE_INTERNAL_PAGER @@ -1211,13 +1211,13 @@ get_undefined(void) int get_mxwarns(void) { - return cmd.n_mxwarns; + return cmd.n_mxwarns[0]; } int get_mxerrs(void) { - return cmd.n_mxerrs; + return cmd.n_mxerrs[0]; } int @@ -1235,7 +1235,7 @@ get_printback(void) int get_mxloops(void) { - return cmd.n_mxloops; + return cmd.n_mxloops[0]; } int @@ -1260,7 +1260,7 @@ get_endcmd(void) size_t get_max_workspace(void) { - return cmd.n_workspace; + return cmd.n_workspace[0]; } double diff --git a/src/t-test.q b/src/t-test.q index 44dd502e..ab25065f 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -46,7 +46,7 @@ /* (specification) "T-TEST" (tts_): +groups=custom; - +testval=double; + testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); pairs=custom; +missing=miss:!analysis/listwise, @@ -1248,7 +1248,7 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) tab_vline(self->t, TAL_2, 1, 0, vsize - 1); tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _("Test Value = %f"),cmd->n_testval); + _("Test Value = %f"), cmd->n_testval[0]); tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); @@ -1288,7 +1288,7 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); - t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ; + t = (gs->mean - cmd->n_testval[0] ) * sqrt(gs->n) / gs->std_dev ; tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); @@ -1536,7 +1536,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) gs= &cmd->v_variables[i]->p.grp_data.ugs; if ( ! value_is_missing(val,v)) - gs->sum_diff += weight * (val->f - cmd->n_testval); + gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } return 0; diff --git a/tests/Makefile.am b/tests/Makefile.am index f651aa43..ed566ef4 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -57,6 +57,7 @@ TESTS = \ stats/descript-mean-bug.sh \ stats/moments.sh \ stats/percentiles-compatible.sh \ + stats/ntiles.sh \ stats/percentiles-enhanced.sh noinst_PROGRAMS = gengarbage diff --git a/tests/stats/ntiles.sh b/tests/stats/ntiles.sh new file mode 100755 index 00000000..35d4af73 --- /dev/null +++ b/tests/stats/ntiles.sh @@ -0,0 +1,108 @@ +#! /bin/sh + +# Tests the NTILE subcommand of the frequencies command + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR + : +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + + +i=1; + +activity="create program $i" +cat > $TEMPDIR/prog.sps < $TEMPDIR/prog.sps <