From 1d985886f778e35f8d89c4e3c897b79fde8de6ed Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 13 Mar 2005 07:31:53 +0000 Subject: [PATCH] Start work on testing and debugging AGGREGATE. --- src/ChangeLog | 18 ++++++ src/aggregate.c | 119 +++++++++++++++++++++++-------------- src/sort.c | 16 +++-- src/sort.h | 4 +- tests/ChangeLog | 5 ++ tests/bugs/agg-crash-2.sh | 2 +- tests/bugs/big-input-2.sh | 2 +- tests/command/aggregate.sh | 2 +- 8 files changed, 115 insertions(+), 53 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index e7296ad5..c559d6ad 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,21 @@ +Sat Mar 12 23:26:21 2005 Ben Pfaff + + Start work on testing and debugging AGGREGATE. + + * aggregate.c: (cmd_aggregate) Use discrete bool variables instead + of a bit-map. Require proper subcommand ordering. Make OUTFILE + subcommand mandatory. + (parse_aggregate_functions) Check that PIN, POUT, FIN, FOUT + functions' arguments are in the correct order and swap them if + not. + (accumulate_aggregate_info) Make SUM include weights. Add various + string functions. + (dump_aggregate_info) Add various string functions. + (initialize_aggregate_info) Initialize int1 for MIN, MAX. + + * sort.c: (sort_parse_criteria) Add parameter for returning + whether any directions were specified. All callers updated. + Sun Mar 13 14:54:27 WST 2005 John Darrington * t-test.q: Fixed erroneous logic in compare_group_binary. diff --git a/src/aggregate.c b/src/aggregate.c index 3037a150..9d925707 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -158,8 +158,9 @@ cmd_aggregate (void) struct agr_proc agr; struct file_handle *out_file = NULL; - /* Have we seen these subcommands? */ - unsigned seen = 0; + bool copy_documents = false; + bool presorted = false; + bool saw_direction; memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; @@ -167,30 +168,24 @@ cmd_aggregate (void) agr.dict = dict_create (); dict_set_label (agr.dict, dict_get_label (default_dict)); dict_set_documents (agr.dict, dict_get_documents (default_dict)); + + /* OUTFILE subcommand must be first. */ + if (!lex_force_match_id ("OUTFILE")) + goto error; + lex_match ('='); + if (!lex_match ('*')) + { + out_file = fh_parse (); + if (out_file == NULL) + goto error; + } /* Read most of the subcommands. */ for (;;) { lex_match ('/'); - if (lex_match_id ("OUTFILE")) - { - if (seen & 1) - { - msg (SE, _("%s subcommand given multiple times."),"OUTFILE"); - goto error; - } - seen |= 1; - - lex_match ('='); - if (!lex_match ('*')) - { - out_file = fh_parse (); - if (out_file == NULL) - goto error; - } - } - else if (lex_match_id ("MISSING")) + if (lex_match_id ("MISSING")) { lex_match ('='); if (!lex_match_id ("COLUMNWISE")) @@ -201,23 +196,17 @@ cmd_aggregate (void) agr.missing = COLUMNWISE; } else if (lex_match_id ("DOCUMENT")) - seen |= 2; + copy_documents = true; else if (lex_match_id ("PRESORTED")) - seen |= 4; + presorted = true; else if (lex_match_id ("BREAK")) { int i; - if (seen & 8) - { - msg (SE, _("%s subcommand given multiple times."),"BREAK"); - goto error; - } - seen |= 8; - lex_match ('='); agr.sort = sort_parse_criteria (default_dict, - &agr.break_vars, &agr.break_var_cnt); + &agr.break_vars, &agr.break_var_cnt, + &saw_direction); if (agr.sort == NULL) goto error; @@ -227,20 +216,28 @@ cmd_aggregate (void) agr.break_vars[i]->name); assert (v != NULL); } + + /* BREAK must follow the options. */ + break; } - else break; + else + { + lex_error (_("expecting BREAK")); + goto error; + } } - - /* Check for proper syntax. */ - if (!(seen & 8)) - msg (SW, _("BREAK subcommand not specified.")); + if (presorted && saw_direction) + msg (SW, _("When PRESORTED is specified, specifying sorting directions " + "with (A) or (D) has no effect. Output data will be sorted " + "the same way as the input data.")); /* Read in the aggregate functions. */ + lex_match ('/'); if (!parse_aggregate_functions (&agr)) goto error; /* Delete documents. */ - if (!(seen & 2)) + if (!copy_documents) dict_set_documents (agr.dict, NULL); /* Cancel SPLIT FILE. */ @@ -258,7 +255,7 @@ cmd_aggregate (void) so TEMPORARY is moot. */ cancel_temporary (); - if (agr.sort != NULL && (seen & 4) == 0) + if (agr.sort != NULL && !presorted) sort_active_file_in_place (agr.sort); agr.sink = create_case_sink (&storage_sink_class, agr.dict, NULL); @@ -283,7 +280,7 @@ cmd_aggregate (void) if (agr.writer == NULL) goto error; - if (agr.sort != NULL && (seen & 4) == 0) + if (agr.sort != NULL && !presorted) { /* Sorting is needed. */ struct casefile *dst; @@ -477,12 +474,12 @@ parse_aggregate_functions (struct agr_proc *agr) goto error; } - /* Now check that the number of source variables match the - number of target variables. Do this here because if we - do it earlier then the user can get very misleading error - messages; i.e., `AGGREGATE x=SUM(y t).' will get this - error message when a proper message would be more like - `unknown variable t'. */ + /* Now check that the number of source variables match + the number of target variables. If we check earlier + than this, the user can get very misleading error + message, i.e. `AGGREGATE x=SUM(y t).' will get this + error message when a proper message would be more + like `unknown variable t'. */ if (n_src != n_dest) { msg (SE, _("Number of source variables (%d) does not match " @@ -490,6 +487,23 @@ parse_aggregate_functions (struct agr_proc *agr) n_src, n_dest); goto error; } + + if ((func_index == PIN || func_index == POUT + || func_index == FIN || func_index == FOUT) + && ((src[0]->type == NUMERIC && arg[0].f > arg[1].f) + || (src[0]->type == ALPHA + && st_compare_pad (arg[0].c, strlen (arg[0].c), + arg[1].c, strlen (arg[1].c)) > 0))) + { + union value t = arg[0]; + arg[0] = arg[1]; + arg[1] = t; + + msg (SW, _("The value arguments passed to the %s function " + "are out-of-order. They will be treated as if " + "they had been specified in the correct order."), + function->name); + } } /* Finally add these to the linked list of aggregation @@ -817,7 +831,7 @@ accumulate_aggregate_info (struct agr_proc *agr, switch (iter->function) { case SUM: - iter->dbl[0] += v->f; + iter->dbl[0] += v->f * weight; break; case MEAN: iter->dbl[0] += v->f * weight; @@ -895,9 +909,11 @@ accumulate_aggregate_info (struct agr_proc *agr, iter->dbl[1] += weight; break; case N: + case N | FSTRING: iter->dbl[0] += weight; break; case NU: + case NU | FSTRING: iter->int1++; break; case FIRST: @@ -922,6 +938,13 @@ accumulate_aggregate_info (struct agr_proc *agr, memcpy (iter->string, v->s, iter->src->width); iter->int1 = 1; break; + case NMISS: + case NMISS | FSTRING: + case NUMISS: + case NUMISS | FSTRING: + /* Our value is not missing or it would have been + caught earlier. Nothing to do. */ + break; default: assert (0); } @@ -1033,9 +1056,11 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; break; case N: + case N | FSTRING: v->f = i->dbl[0]; break; case NU: + case NU | FSTRING: v->f = i->int1; break; case FIRST: @@ -1056,9 +1081,11 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) v->f = i->int1; break; case NMISS: + case NMISS | FSTRING: v->f = i->dbl[0]; break; case NUMISS: + case NUMISS | FSTRING: v->f = i->int1; break; default: @@ -1081,12 +1108,14 @@ initialize_aggregate_info (struct agr_proc *agr) { case MIN: iter->dbl[0] = DBL_MAX; + iter->int1 = 0; break; case MIN | FSTRING: memset (iter->string, 255, iter->src->width); break; case MAX: iter->dbl[0] = -DBL_MAX; + iter->int1 = 0; break; case MAX | FSTRING: memset (iter->string, 0, iter->src->width); diff --git a/src/sort.c b/src/sort.c index a77c5a82..6e518bef 100644 --- a/src/sort.c +++ b/src/sort.c @@ -25,6 +25,7 @@ #include #include "algorithm.h" #include "alloc.h" +#include "bool.h" #include "case.h" #include "casefile.h" #include "command.h" @@ -91,7 +92,7 @@ cmd_sort_cases (void) lex_match (T_BY); - criteria = sort_parse_criteria (default_dict, NULL, NULL); + criteria = sort_parse_criteria (default_dict, NULL, NULL, NULL); if (criteria == NULL) return CMD_FAILURE; @@ -151,11 +152,15 @@ sort_active_file_to_casefile (const struct sort_criteria *criteria) return sort_execute (casefile_get_reader (src), criteria); } -/* Parses a list of sort keys and returns a struct sort_cases_pgm - based on it. Returns a null pointer on error. */ +/* Parses a list of sort keys and returns a struct sort_criteria + based on it. Returns a null pointer on error. + If SAW_DIRECTION is nonnull, sets *SAW_DIRECTION to true if at + least one parenthesized sort direction was specified, false + otherwise. */ struct sort_criteria * sort_parse_criteria (const struct dictionary *dict, - struct variable ***vars, int *var_cnt) + struct variable ***vars, int *var_cnt, + bool *saw_direction) { struct sort_criteria *criteria; struct variable **local_vars = NULL; @@ -174,6 +179,8 @@ sort_parse_criteria (const struct dictionary *dict, *vars = NULL; *var_cnt = 0; + if (saw_direction != NULL) + *saw_direction = false; do { @@ -202,6 +209,7 @@ sort_parse_criteria (const struct dictionary *dict, msg (SE, _("`)' expected.")); goto error; } + *saw_direction = true; } else direction = SRT_ASCEND; diff --git a/src/sort.h b/src/sort.h index 63054413..c8f97af0 100644 --- a/src/sort.h +++ b/src/sort.h @@ -21,13 +21,15 @@ #define sort_h 1 #include +#include "bool.h" struct casereader; struct dictionary; struct variable; struct sort_criteria *sort_parse_criteria (const struct dictionary *, - struct variable ***, int *); + struct variable ***, int *, + bool *saw_direction); void sort_destroy_criteria (struct sort_criteria *); struct casefile *sort_execute (struct casereader *, diff --git a/tests/ChangeLog b/tests/ChangeLog index 26fdf957..bbc4a7f2 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +Sat Mar 12 23:30:37 2005 Ben Pfaff + + * bugs/agg-crash-2.sh, bugs/big-input-2.sh, command/aggregate.sh: + Fix AGGREGATE command syntax. + Sat Mar 12 13:16:34 2005 Ben Pfaff * bugs/temp-freq.sh: Add another test. diff --git a/tests/bugs/agg-crash-2.sh b/tests/bugs/agg-crash-2.sh index 384c3c80..01aa218b 100755 --- a/tests/bugs/agg-crash-2.sh +++ b/tests/bugs/agg-crash-2.sh @@ -59,7 +59,7 @@ END DATA. -AGGREGATE /BREAK=y /x=MAX(x). +AGGREGATE OUTFILE=* /BREAK=y /x=MAX(x). LIST /x y. EOF if [ $? -ne 0 ] ; then no_result ; fi diff --git a/tests/bugs/big-input-2.sh b/tests/bugs/big-input-2.sh index cb89adf4..9f6cd341 100755 --- a/tests/bugs/big-input-2.sh +++ b/tests/bugs/big-input-2.sh @@ -71,7 +71,7 @@ cat > $TESTFILE <