X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Faggregate.c;h=5979204d08c0c407a078f2ca1ad367665ff0ed67;hb=93abfcc57a1e6ff792e5ae01784172459eee783f;hp=3037a150fbb07e92dea17ec284b42302d042a19b;hpb=4848cff524922cc77ed21662406807471e96a68e;p=pspp-builds.git diff --git a/src/aggregate.c b/src/aggregate.c index 3037a150..5979204d 100644 --- a/src/aggregate.c +++ b/src/aggregate.c @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include "error.h" @@ -33,12 +33,16 @@ #include "pool.h" #include "settings.h" #include "sfm-write.h" +#include "sort-prs.h" #include "sort.h" #include "str.h" #include "var.h" #include "vfm.h" #include "vfmP.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + /* Specifies how to make an aggregate variable. */ struct agr_var { @@ -124,7 +128,7 @@ struct agr_proc struct sort_criteria *sort; /* Sort criteria. */ struct variable **break_vars; /* Break variables. */ size_t break_var_cnt; /* Number of break variables. */ - union value *prev_break; /* Last values of break variables. */ + struct ccase break_case; /* Last values of break variables. */ enum missing_treatment missing; /* How to treat missing values. */ struct agr_var *agr_vars; /* First aggregate variable. */ @@ -133,7 +137,8 @@ struct agr_proc struct ccase agr_case; /* Aggregate case for output. */ }; -static void initialize_aggregate_info (struct agr_proc *); +static void initialize_aggregate_info (struct agr_proc *, + const struct ccase *); /* Prototypes. */ static int parse_aggregate_functions (struct agr_proc *); @@ -158,39 +163,35 @@ cmd_aggregate (void) struct agr_proc agr; struct file_handle *out_file = NULL; - /* Have we seen these subcommands? */ - unsigned seen = 0; + bool copy_documents = false; + bool presorted = false; + bool saw_direction; memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; + case_nullify (&agr.break_case); agr.dict = dict_create (); dict_set_label (agr.dict, dict_get_label (default_dict)); dict_set_documents (agr.dict, dict_get_documents (default_dict)); + + /* OUTFILE subcommand must be first. */ + if (!lex_force_match_id ("OUTFILE")) + goto error; + lex_match ('='); + if (!lex_match ('*')) + { + out_file = fh_parse (); + if (out_file == NULL) + goto error; + } /* Read most of the subcommands. */ for (;;) { lex_match ('/'); - if (lex_match_id ("OUTFILE")) - { - if (seen & 1) - { - msg (SE, _("%s subcommand given multiple times."),"OUTFILE"); - goto error; - } - seen |= 1; - - lex_match ('='); - if (!lex_match ('*')) - { - out_file = fh_parse (); - if (out_file == NULL) - goto error; - } - } - else if (lex_match_id ("MISSING")) + if (lex_match_id ("MISSING")) { lex_match ('='); if (!lex_match_id ("COLUMNWISE")) @@ -201,46 +202,45 @@ cmd_aggregate (void) agr.missing = COLUMNWISE; } else if (lex_match_id ("DOCUMENT")) - seen |= 2; + copy_documents = true; else if (lex_match_id ("PRESORTED")) - seen |= 4; + presorted = true; else if (lex_match_id ("BREAK")) { int i; - if (seen & 8) - { - msg (SE, _("%s subcommand given multiple times."),"BREAK"); - goto error; - } - seen |= 8; - lex_match ('='); agr.sort = sort_parse_criteria (default_dict, - &agr.break_vars, &agr.break_var_cnt); + &agr.break_vars, &agr.break_var_cnt, + &saw_direction, NULL); if (agr.sort == NULL) goto error; for (i = 0; i < agr.break_var_cnt; i++) - { - struct variable *v = dict_clone_var (agr.dict, agr.break_vars[i], - agr.break_vars[i]->name); - assert (v != NULL); - } + dict_clone_var_assert (agr.dict, agr.break_vars[i], + agr.break_vars[i]->name); + + /* BREAK must follow the options. */ + break; } - else break; + else + { + lex_error (_("expecting BREAK")); + goto error; + } } - - /* Check for proper syntax. */ - if (!(seen & 8)) - msg (SW, _("BREAK subcommand not specified.")); + if (presorted && saw_direction) + msg (SW, _("When PRESORTED is specified, specifying sorting directions " + "with (A) or (D) has no effect. Output data will be sorted " + "the same way as the input data.")); /* Read in the aggregate functions. */ + lex_match ('/'); if (!parse_aggregate_functions (&agr)) goto error; /* Delete documents. */ - if (!(seen & 2)) + if (!copy_documents) dict_set_documents (agr.dict, NULL); /* Cancel SPLIT FILE. */ @@ -249,7 +249,6 @@ cmd_aggregate (void) /* Initialize. */ agr.case_cnt = 0; case_create (&agr.agr_case, dict_get_next_value_idx (agr.dict)); - initialize_aggregate_info (&agr); /* Output to active file or external file? */ if (out_file == NULL) @@ -258,7 +257,7 @@ cmd_aggregate (void) so TEMPORARY is moot. */ cancel_temporary (); - if (agr.sort != NULL && (seen & 4) == 0) + if (agr.sort != NULL && !presorted) sort_active_file_in_place (agr.sort); agr.sink = create_case_sink (&storage_sink_class, agr.dict, NULL); @@ -279,11 +278,12 @@ cmd_aggregate (void) } else { - agr.writer = sfm_open_writer (out_file, agr.dict, get_scompression ()); + agr.writer = sfm_open_writer (out_file, agr.dict, + sfm_writer_default_options ()); if (agr.writer == NULL) goto error; - if (agr.sort != NULL && (seen & 4) == 0) + if (agr.sort != NULL && !presorted) { /* Sorting is needed. */ struct casefile *dst; @@ -399,7 +399,7 @@ parse_aggregate_functions (struct agr_proc *agr) } for (function = agr_func_tab; function->name; function++) - if (!strcmp (function->name, tokid)) + if (!strcasecmp (function->name, tokid)) break; if (NULL == function->name) { @@ -455,7 +455,8 @@ parse_aggregate_functions (struct agr_proc *agr) arg[i].f = tokval; type = NUMERIC; } else { - msg (SE, _("Missing argument %d to %s."), i + 1, function->name); + msg (SE, _("Missing argument %d to %s."), i + 1, + function->name); goto error; } @@ -477,12 +478,12 @@ parse_aggregate_functions (struct agr_proc *agr) goto error; } - /* Now check that the number of source variables match the - number of target variables. Do this here because if we - do it earlier then the user can get very misleading error - messages; i.e., `AGGREGATE x=SUM(y t).' will get this - error message when a proper message would be more like - `unknown variable t'. */ + /* Now check that the number of source variables match + the number of target variables. If we check earlier + than this, the user can get very misleading error + message, i.e. `AGGREGATE x=SUM(y t).' will get this + error message when a proper message would be more + like `unknown variable t'. */ if (n_src != n_dest) { msg (SE, _("Number of source variables (%d) does not match " @@ -490,6 +491,22 @@ parse_aggregate_functions (struct agr_proc *agr) n_src, n_dest); goto error; } + + if ((func_index == PIN || func_index == POUT + || func_index == FIN || func_index == FOUT) + && ((src[0]->type == NUMERIC && arg[0].f > arg[1].f) + || (src[0]->type == ALPHA + && str_compare_rpad (arg[0].c, arg[1].c) > 0))) + { + union value t = arg[0]; + arg[0] = arg[1]; + arg[1] = t; + + msg (SW, _("The value arguments passed to the %s function " + "are out-of-order. They will be treated as if " + "they had been specified in the correct order."), + function->name); + } } /* Finally add these to the linked list of aggregation @@ -526,29 +543,28 @@ parse_aggregate_functions (struct agr_proc *agr) if (function->alpha_type == ALPHA) destvar = dict_clone_var (agr->dict, v->src, dest[i]); - else if (v->src->type == NUMERIC - || function->alpha_type == NUMERIC) + else { + assert (v->src->type == NUMERIC + || function->alpha_type == NUMERIC); destvar = dict_create_var (agr->dict, dest[i], 0); - - if ((func_index == N - || func_index == N_NO_VARS - || func_index == NMISS) - && dict_get_weight (default_dict) != NULL) + if (destvar != NULL) { - static const struct fmt_spec f8_2 = {FMT_F, 8, 2}; - - destvar->print = destvar->write = f8_2; + if ((func_index == N || func_index == NMISS) + && dict_get_weight (default_dict) != NULL) + destvar->print = destvar->write = f8_2; + else + destvar->print = destvar->write = function->format; } - else - destvar->print = destvar->write = function->format; } - else - destvar = dict_create_var (agr->dict, dest[i], - v->src->width); } else { v->src = NULL; destvar = dict_create_var (agr->dict, dest[i], 0); + if (func_index == N_NO_VARS + && dict_get_weight (default_dict) != NULL) + destvar->print = destvar->write = f8_2; + else + destvar->print = destvar->write = function->format; } if (!destvar) @@ -558,7 +574,6 @@ parse_aggregate_functions (struct agr_proc *agr) "the aggregate variables and the break " "variables."), dest[i]); - free (dest[i]); goto error; } @@ -641,7 +656,7 @@ agr_destroy (struct agr_proc *agr) if (agr->sort != NULL) sort_destroy_criteria (agr->sort); free (agr->break_vars); - free (agr->prev_break); + case_destroy (&agr->break_case); for (iter = agr->agr_vars; iter; iter = next) { next = iter->next; @@ -679,105 +694,21 @@ static int aggregate_single_case (struct agr_proc *agr, const struct ccase *input, struct ccase *output) { - /* The first case always begins a new break group. We also need to - preserve the values of the case for later comparison. */ + bool finished_group = false; + if (agr->case_cnt++ == 0) + initialize_aggregate_info (agr, input); + else if (case_compare (&agr->break_case, input, + agr->break_vars, agr->break_var_cnt)) { - int n_elem = 0; - - { - int i; + dump_aggregate_info (agr, output); + finished_group = true; - for (i = 0; i < agr->break_var_cnt; i++) - n_elem += agr->break_vars[i]->nv; - } - - agr->prev_break = xmalloc (sizeof *agr->prev_break * n_elem); - - /* Copy INPUT into prev_break. */ - { - union value *iter = agr->prev_break; - int i; - - for (i = 0; i < agr->break_var_cnt; i++) - { - struct variable *v = agr->break_vars[i]; - - if (v->type == NUMERIC) - (iter++)->f = case_num (input, v->fv); - else - { - memcpy (iter->s, case_str (input, v->fv), v->width); - iter += v->nv; - } - } - } - - accumulate_aggregate_info (agr, input); - - return 0; + initialize_aggregate_info (agr, input); } - - /* Compare the value of each break variable to the values on the - previous case. */ - { - union value *iter = agr->prev_break; - int i; - - for (i = 0; i < agr->break_var_cnt; i++) - { - struct variable *v = agr->break_vars[i]; - - switch (v->type) - { - case NUMERIC: - if (case_num (input, v->fv) != iter->f) - goto not_equal; - iter++; - break; - case ALPHA: - if (memcmp (case_str (input, v->fv), iter->s, v->width)) - goto not_equal; - iter += v->nv; - break; - default: - assert (0); - } - } - } accumulate_aggregate_info (agr, input); - - return 0; - -not_equal: - /* The values of the break variable are different from the values on - the previous case. That means that it's time to dump aggregate - info. */ - dump_aggregate_info (agr, output); - initialize_aggregate_info (agr); - accumulate_aggregate_info (agr, input); - - /* Copy INPUT into prev_break. */ - { - union value *iter = agr->prev_break; - int i; - - for (i = 0; i < agr->break_var_cnt; i++) - { - struct variable *v = agr->break_vars[i]; - - if (v->type == NUMERIC) - (iter++)->f = case_num (input, v->fv); - else - { - memcpy (iter->s, case_str (input, v->fv), v->width); - iter += v->nv; - } - } - } - - return 1; + return finished_group; } /* Accumulates aggregation data from the case INPUT. */ @@ -796,16 +727,19 @@ accumulate_aggregate_info (struct agr_proc *agr, { const union value *v = case_data (input, iter->src->fv); - if ((!iter->include_missing && is_missing (v, iter->src)) + if ((!iter->include_missing + && mv_is_value_missing (&iter->src->miss, v)) || (iter->include_missing && iter->src->type == NUMERIC && v->f == SYSMIS)) { switch (iter->function) { case NMISS: + case NMISS | FSTRING: iter->dbl[0] += weight; break; case NUMISS: + case NUMISS | FSTRING: iter->int1++; break; } @@ -817,7 +751,8 @@ accumulate_aggregate_info (struct agr_proc *agr, switch (iter->function) { case SUM: - iter->dbl[0] += v->f; + iter->dbl[0] += v->f * weight; + iter->int1 = 1; break; case MEAN: iter->dbl[0] += v->f * weight; @@ -890,14 +825,16 @@ accumulate_aggregate_info (struct agr_proc *agr, case FOUT | FSTRING: case POUT | FSTRING: if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0 - && memcmp (iter->arg[1].c, v->s, iter->src->width) < 0) + || memcmp (iter->arg[1].c, v->s, iter->src->width) < 0) iter->dbl[0] += weight; iter->dbl[1] += weight; break; case N: + case N | FSTRING: iter->dbl[0] += weight; break; case NU: + case NU | FSTRING: iter->int1++; break; case FIRST: @@ -922,6 +859,13 @@ accumulate_aggregate_info (struct agr_proc *agr, memcpy (iter->string, v->s, iter->src->width); iter->int1 = 1; break; + case NMISS: + case NMISS | FSTRING: + case NUMISS: + case NUMISS | FSTRING: + /* Our value is not missing or it would have been + caught earlier. Nothing to do. */ + break; default: assert (0); } @@ -952,11 +896,11 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) for (i = 0; i < agr->break_var_cnt; i++) { - int nv = agr->break_vars[i]->nv; + struct variable *v = agr->break_vars[i]; memcpy (case_data_rw (output, value_idx), - &agr->prev_break[value_idx], - sizeof (union value) * nv); - value_idx += nv; + case_data (&agr->break_case, v->fv), + sizeof (union value) * v->nv); + value_idx += v->nv; } } @@ -971,7 +915,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) && (i->function & FUNC) != N && (i->function & FUNC) != NU && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) { - if (i->function & FSTRING) + if (i->dest->type == ALPHA) memset (v->s, ' ', i->dest->width); else v->f = SYSMIS; @@ -981,7 +925,7 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) switch (i->function) { case SUM: - v->f = i->dbl[0]; + v->f = i->int1 ? i->dbl[0] : SYSMIS; break; case MEAN: v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS; @@ -1010,16 +954,14 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) else memset (v->s, ' ', i->dest->width); break; - case FGT | FSTRING: - case FLT | FSTRING: - case FIN | FSTRING: - case FOUT | FSTRING: - v->f = i->int2 ? (double) i->int1 / (double) i->int2 : SYSMIS; - break; case FGT: + case FGT | FSTRING: case FLT: + case FLT | FSTRING: case FIN: + case FIN | FSTRING: case FOUT: + case FOUT | FSTRING: v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS; break; case PGT: @@ -1033,9 +975,11 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; break; case N: + case N | FSTRING: v->f = i->dbl[0]; break; case NU: + case NU | FSTRING: v->f = i->int1; break; case FIRST: @@ -1056,9 +1000,11 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) v->f = i->int1; break; case NMISS: + case NMISS | FSTRING: v->f = i->dbl[0]; break; case NUMISS: + case NUMISS | FSTRING: v->f = i->int1; break; default: @@ -1070,13 +1016,18 @@ dump_aggregate_info (struct agr_proc *agr, struct ccase *output) /* Resets the state for all the aggregate functions. */ static void -initialize_aggregate_info (struct agr_proc *agr) +initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) { struct agr_var *iter; + case_destroy (&agr->break_case); + case_clone (&agr->break_case, input); + for (iter = agr->agr_vars; iter; iter = iter->next) { iter->missing = 0; + iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0; + iter->int1 = iter->int2 = 0; switch (iter->function) { case MIN: @@ -1097,10 +1048,8 @@ initialize_aggregate_info (struct agr_proc *agr) else moments1_clear (iter->moments); break; - default: - iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0; - iter->int1 = iter->int2 = 0; - break; + default: + break; } } }