X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Faggregate.c;h=4edbe22f773444b70ba341bb04c0b6f03e1ca578;hb=d84c9fab98875caf8fea1d870bf2a6bceeb36bdc;hp=42f330d5d21a64b141a963b82ce1bbcae49a7c3e;hpb=726b2e7b2b7a79f0cf860efe43e349762bf3f44e;p=pspp diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 42f330d5d2..4edbe22f77 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2008, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,120 +16,89 @@ #include +#include "language/stats/aggregate.h" + #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" +#include "data/any-writer.h" +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/format.h" +#include "data/settings.h" +#include "data/subcase.h" +#include "data/sys-file-writer.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/data-io/file-handle.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "language/stats/sort-criteria.h" +#include "libpspp/assertion.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "math/moments.h" +#include "math/percentiles.h" +#include "math/sort.h" +#include "math/statistic.h" + +#include "gl/c-strcase.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +/* Argument for AGGREGATE function. -/* Argument for AGGREGATE function. */ -union agr_argument + Only one of the members is used, so this could be a union, but it's simpler + to just have both. */ +struct agr_argument { double f; /* Numeric. */ - char *c; /* Short or long string. */ + struct substring s; /* String. */ }; /* Specifies how to make an aggregate variable. */ struct agr_var { - struct agr_var *next; /* Next in list. */ - /* Collected during parsing. */ const struct variable *src; /* Source variable. */ struct variable *dest; /* Target variable. */ - int function; /* Function. */ + enum agr_function function; /* Function. */ enum mv_class exclude; /* Classes of missing values to exclude. */ - union agr_argument arg[2]; /* Arguments. */ + struct agr_argument arg[2]; /* Arguments. */ /* Accumulated during AGGREGATE execution. */ - double dbl[3]; - int int1, int2; + double dbl; + double W; /* Total non-missing weight. */ + int int1; char *string; bool saw_missing; struct moments1 *moments; - double cc; struct variable *subject; struct variable *weight; struct casewriter *writer; }; -/* Aggregation functions. */ -enum - { - NONE, SUM, MEAN, MEDIAN, SD, MAX, MIN, PGT, PLT, PIN, POUT, FGT, FLT, FIN, - FOUT, N, NU, NMISS, NUMISS, FIRST, LAST, - N_AGR_FUNCS, N_NO_VARS, NU_NO_VARS, - FUNC = 0x1f, /* Function mask. */ - FSTRING = 1<<5, /* String function bit. */ - }; - -/* Attributes of an aggregation function. */ -struct agr_func - { - const char *name; /* Aggregation function name. */ - size_t n_args; /* Number of arguments. */ - enum val_type alpha_type; /* When given ALPHA arguments, output type. */ - struct fmt_spec format; /* Format spec if alpha_type != ALPHA. */ - }; - /* Attributes of aggregation functions. */ -static const struct agr_func agr_func_tab[] = +const struct agr_func agr_func_tab[] = { - {"", 0, -1, {0, 0, 0}}, - {"SUM", 0, -1, {FMT_F, 8, 2}}, - {"MEAN", 0, -1, {FMT_F, 8, 2}}, - {"MEDIAN", 0, -1, {FMT_F, 8, 2}}, - {"SD", 0, -1, {FMT_F, 8, 2}}, - {"MAX", 0, VAL_STRING, {-1, -1, -1}}, - {"MIN", 0, VAL_STRING, {-1, -1, -1}}, - {"PGT", 1, VAL_NUMERIC, {FMT_F, 5, 1}}, - {"PLT", 1, VAL_NUMERIC, {FMT_F, 5, 1}}, - {"PIN", 2, VAL_NUMERIC, {FMT_F, 5, 1}}, - {"POUT", 2, VAL_NUMERIC, {FMT_F, 5, 1}}, - {"FGT", 1, VAL_NUMERIC, {FMT_F, 5, 3}}, - {"FLT", 1, VAL_NUMERIC, {FMT_F, 5, 3}}, - {"FIN", 2, VAL_NUMERIC, {FMT_F, 5, 3}}, - {"FOUT", 2, VAL_NUMERIC, {FMT_F, 5, 3}}, - {"N", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, - {"NU", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, - {"NMISS", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, - {"NUMISS", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, - {"FIRST", 0, VAL_STRING, {-1, -1, -1}}, - {"LAST", 0, VAL_STRING, {-1, -1, -1}}, - {NULL, 0, -1, {-1, -1, -1}}, - {"N", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, - {"NU", 0, VAL_NUMERIC, {FMT_F, 7, 0}}, +#define AGRF(ENUM, NAME, DESCRIPTION, SRC_VARS, N_ARGS, ALPHA_TYPE, W, D) \ + [ENUM] = { NAME, DESCRIPTION, SRC_VARS, N_ARGS, ALPHA_TYPE, \ + { .type = (W) > 0 ? FMT_F : -1, .w = W, .d = D } }, +AGGREGATE_FUNCTIONS +#undef AGRF + {NULL, NULL, AGR_SV_NO, 0, -1, {-1, -1, -1}}, }; /* Missing value types. */ @@ -145,18 +114,20 @@ struct agr_proc /* Break variables. */ struct subcase sort; /* Sort criteria (break variables). */ const struct variable **break_vars; /* Break variables. */ - size_t break_var_cnt; /* Number of break variables. */ - struct ccase *break_case; /* Last values of break variables. */ + size_t break_n_vars; /* Number of break variables. */ enum missing_treatment missing; /* How to treat missing values. */ - struct agr_var *agr_vars; /* First aggregate variable. */ + struct agr_var *agr_vars; /* Aggregate variables. */ + size_t n_agr_vars; struct dictionary *dict; /* Aggregate dictionary. */ const struct dictionary *src_dict; /* Dict of the source */ - int case_cnt; /* Counts aggregated cases. */ + int n_cases; /* Counts aggregated cases. */ + + bool add_variables; /* True iff the aggregated variables should + be appended to the existing dictionary */ }; -static void initialize_aggregate_info (struct agr_proc *, - const struct ccase *); +static void initialize_aggregate_info (struct agr_proc *); static void accumulate_aggregate_info (struct agr_proc *, const struct ccase *); @@ -164,8 +135,9 @@ static void accumulate_aggregate_info (struct agr_proc *, static bool parse_aggregate_functions (struct lexer *, const struct dictionary *, struct agr_proc *); static void agr_destroy (struct agr_proc *); -static void dump_aggregate_info (struct agr_proc *agr, - struct casewriter *output); +static void dump_aggregate_info (const struct agr_proc *agr, + struct casewriter *output, + const struct ccase *break_case); /* Parsing. */ @@ -174,87 +146,121 @@ int cmd_aggregate (struct lexer *lexer, struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); - struct agr_proc agr; + struct agr_proc agr = { + .missing = ITEMWISE, + .src_dict = dict, + }; struct file_handle *out_file = NULL; - struct casereader *input = NULL, *group; - struct casegrouper *grouper; + struct casereader *input = NULL; struct casewriter *output = NULL; bool copy_documents = false; bool presorted = false; - bool saw_direction; - bool ok; - - memset(&agr, 0 , sizeof (agr)); - agr.missing = ITEMWISE; - agr.break_case = NULL; - - agr.dict = dict_create (); - agr.src_dict = dict; - subcase_init_empty (&agr.sort); - dict_set_label (agr.dict, dict_get_label (dict)); - dict_set_documents (agr.dict, dict_get_documents (dict)); + int addvariables_ofs = 0; /* OUTFILE subcommand must be first. */ - lex_match (lexer, '/'); - if (!lex_force_match_id (lexer, "OUTFILE")) - goto error; - lex_match (lexer, '='); - if (!lex_match (lexer, '*')) + if (lex_match_phrase (lexer, "/OUTFILE") || lex_match_id (lexer, "OUTFILE")) { - out_file = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH); - if (out_file == NULL) - goto error; + lex_match (lexer, T_EQUALS); + if (!lex_match (lexer, T_ASTERISK)) + { + out_file = fh_parse (lexer, FH_REF_FILE, dataset_session (ds)); + if (out_file == NULL) + goto error; + } + + if (!out_file && lex_match_id (lexer, "MODE")) + { + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "ADDVARIABLES")) + { + addvariables_ofs = lex_ofs (lexer) - 1; + agr.add_variables = true; + presorted = true; + } + else if (lex_match_id (lexer, "REPLACE")) + agr.add_variables = false; + else + { + lex_error_expecting (lexer, "ADDVARIABLES", "REPLACE"); + goto error; + } + } + } + else + { + agr.add_variables = true; + presorted = true; } - /* Read most of the subcommands. */ - for (;;) + if (lex_match_phrase (lexer, "/MISSING")) { - lex_match (lexer, '/'); + lex_match (lexer, T_EQUALS); + if (!lex_match_id (lexer, "COLUMNWISE")) + { + lex_error_expecting (lexer, "COLUMNWISE"); + goto error; + } + agr.missing = COLUMNWISE; + } - if (lex_match_id (lexer, "MISSING")) - { - lex_match (lexer, '='); - if (!lex_match_id (lexer, "COLUMNWISE")) - { - lex_error (lexer, _("while expecting COLUMNWISE")); - goto error; - } - agr.missing = COLUMNWISE; - } - else if (lex_match_id (lexer, "DOCUMENT")) - copy_documents = true; - else if (lex_match_id (lexer, "PRESORTED")) + int presorted_ofs = 0; + for (;;) + if (lex_match_phrase (lexer, "/DOCUMENT")) + copy_documents = true; + else if (lex_match_phrase (lexer, "/PRESORTED")) + { presorted = true; - else if (lex_match_id (lexer, "BREAK")) - { - int i; + presorted_ofs = lex_ofs (lexer) - 1; + } + else + break; - lex_match (lexer, '='); - if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars, - &saw_direction)) - goto error; - agr.break_var_cnt = subcase_get_n_fields (&agr.sort); + if (agr.add_variables) + agr.dict = dict_clone (dict); + else + agr.dict = dict_create (dict_get_encoding (dict)); - for (i = 0; i < agr.break_var_cnt; i++) - dict_clone_var_assert (agr.dict, agr.break_vars[i]); + dict_set_label (agr.dict, dict_get_label (dict)); + dict_set_documents (agr.dict, dict_get_documents (dict)); - /* BREAK must follow the options. */ - break; - } - else + if (lex_match_phrase (lexer, "/BREAK")) + { + lex_match (lexer, T_EQUALS); + bool saw_direction; + int break_start = lex_ofs (lexer); + if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars, + &saw_direction)) + goto error; + int break_end = lex_ofs (lexer) - 1; + agr.break_n_vars = subcase_get_n_fields (&agr.sort); + + if (! agr.add_variables) + for (size_t i = 0; i < agr.break_n_vars; i++) + dict_clone_var_assert (agr.dict, agr.break_vars[i]); + + if (presorted && saw_direction) { - lex_error (lexer, _("expecting BREAK")); - goto error; + lex_ofs_msg (lexer, SW, break_start, break_end, + _("When the input data is presorted, specifying " + "sorting directions with (A) or (D) has no effect. " + "Output data will be sorted the same way as the " + "input data.")); + if (presorted_ofs) + lex_ofs_msg (lexer, SN, presorted_ofs, presorted_ofs, + _("The PRESORTED subcommand state that the " + "input data is presorted.")); + else if (addvariables_ofs) + lex_ofs_msg (lexer, SN, addvariables_ofs, addvariables_ofs, + _("ADDVARIABLES implies that the input data " + "is presorted.")); + else + msg (SN, _("The input data must be presorted because the " + "OUTFILE subcommand is not specified.")); } } - if (presorted && saw_direction) - msg (SW, _("When PRESORTED is specified, specifying sorting directions " - "with (A) or (D) has no effect. Output data will be sorted " - "the same way as the input data.")); /* Read in the aggregate functions. */ - lex_match (lexer, '/'); if (!parse_aggregate_functions (lexer, dict, &agr)) goto error; @@ -263,14 +269,14 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) dict_clear_documents (agr.dict); /* Cancel SPLIT FILE. */ - dict_set_split_vars (agr.dict, NULL, 0); + dict_clear_split_vars (agr.dict); /* Initialize. */ - agr.case_cnt = 0; + agr.n_cases = 0; if (out_file == NULL) { - /* The active file will be replaced by the aggregated data, + /* The active dataset will be replaced by the aggregated data, so TEMPORARY is moot. */ proc_cancel_temporary_transformations (ds); proc_discard_output (ds); @@ -290,33 +296,55 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) subcase_clear (&agr.sort); } + struct casegrouper *grouper; + struct casereader *group; for (grouper = casegrouper_create_vars (input, agr.break_vars, - agr.break_var_cnt); + agr.break_n_vars); casegrouper_get_next_group (grouper, &group); casereader_destroy (group)) { + struct casereader *placeholder = NULL; struct ccase *c = casereader_peek (group, 0); + if (c == NULL) { casereader_destroy (group); continue; } - initialize_aggregate_info (&agr, c); - case_unref (c); - for (; (c = casereader_read (group)) != NULL; case_unref (c)) - accumulate_aggregate_info (&agr, c); - dump_aggregate_info (&agr, output); + initialize_aggregate_info (&agr); + + if (agr.add_variables) + placeholder = casereader_clone (group); + + { + struct ccase *cg; + for (; (cg = casereader_read (group)) != NULL; case_unref (cg)) + accumulate_aggregate_info (&agr, cg); + } + + + if (agr.add_variables) + { + struct ccase *cg; + for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg)) + dump_aggregate_info (&agr, output, cg); + + casereader_destroy (placeholder); + } + else + { + dump_aggregate_info (&agr, output, c); + } + case_unref (c); } if (!casegrouper_destroy (grouper)) goto error; - if (!proc_commit (ds)) - { - input = NULL; - goto error; - } + bool ok = proc_commit (ds); input = NULL; + if (!ok) + goto error; if (out_file == NULL) { @@ -324,7 +352,8 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) if (next_input == NULL) goto error; - proc_set_active_file (ds, next_input, agr.dict); + dataset_set_dict (ds, agr.dict); + dataset_set_source (ds, next_input); agr.dict = NULL; } else @@ -348,175 +377,165 @@ error: return CMD_CASCADING_FAILURE; } +static bool +parse_agr_func_name (struct lexer *lexer, int *func_index, + enum mv_class *exclude) +{ + if (lex_token (lexer) != T_ID) + { + lex_error (lexer, _("Syntax error expecting aggregation function.")); + return false; + } + + struct substring name = lex_tokss (lexer); + *exclude = ss_chomp_byte (&name, '.') ? MV_SYSTEM : MV_ANY; + + for (const struct agr_func *f = agr_func_tab; f->name; f++) + if (ss_equals_case (ss_cstr (f->name), name)) + { + *func_index = f - agr_func_tab; + lex_get (lexer); + return true; + } + lex_error (lexer, _("Unknown aggregation function %s."), lex_tokcstr (lexer)); + return false; +} + /* Parse all the aggregate functions. */ static bool parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, struct agr_proc *agr) { - struct agr_var *tail; /* Tail of linked list starting at agr->vars. */ + if (!lex_force_match (lexer, T_SLASH)) + return false; + + size_t starting_n_vars = dict_get_n_vars (dict); + size_t allocated_agr_vars = 0; /* Parse everything. */ - tail = NULL; for (;;) { - char **dest; - char **dest_label; - size_t n_dest; - struct string function_name; + char **dest = NULL; + char **dest_label = NULL; + size_t n_vars = 0; - enum mv_class exclude; - const struct agr_func *function; - int func_index; + struct agr_argument arg[2] = { { .f = 0 }, { .f = 0 } }; - union agr_argument arg[2]; - - const struct variable **src; - size_t n_src; - - size_t i; - - dest = NULL; - dest_label = NULL; - n_dest = 0; - src = NULL; - function = NULL; - n_src = 0; - arg[0].c = NULL; - arg[1].c = NULL; - ds_init_empty (&function_name); + const struct variable **src = NULL; /* Parse the list of target variables. */ - while (!lex_match (lexer, '=')) + int dst_start_ofs = lex_ofs (lexer); + while (!lex_match (lexer, T_EQUALS)) { - size_t n_dest_prev = n_dest; + size_t n_vars_prev = n_vars; - if (!parse_DATA_LIST_vars (lexer, &dest, &n_dest, + if (!parse_DATA_LIST_vars (lexer, dict, &dest, &n_vars, (PV_APPEND | PV_SINGLE | PV_NO_SCRATCH | PV_NO_DUPLICATE))) goto error; /* Assign empty labels. */ - { - int j; + dest_label = xnrealloc (dest_label, n_vars, sizeof *dest_label); + for (size_t j = n_vars_prev; j < n_vars; j++) + dest_label[j] = NULL; - dest_label = xnrealloc (dest_label, n_dest, sizeof *dest_label); - for (j = n_dest_prev; j < n_dest; j++) - dest_label[j] = NULL; - } - - - - if (lex_token (lexer) == T_STRING) + if (lex_is_string (lexer)) { - struct string label; - ds_init_string (&label, lex_tokstr (lexer)); - - ds_truncate (&label, 255); - dest_label[n_dest - 1] = ds_xstrdup (&label); + dest_label[n_vars - 1] = xstrdup (lex_tokcstr (lexer)); lex_get (lexer); - ds_destroy (&label); } } + int dst_end_ofs = lex_ofs (lexer) - 2; /* Get the name of the aggregation function. */ - if (lex_token (lexer) != T_ID) - { - lex_error (lexer, _("expecting aggregation function")); - goto error; - } - - exclude = MV_ANY; - - ds_assign_string (&function_name, lex_tokstr (lexer)); - - ds_chomp (&function_name, '.'); - - if (lex_tokid(lexer)[strlen (lex_tokid (lexer)) - 1] == '.') - exclude = MV_SYSTEM; - - for (function = agr_func_tab; function->name; function++) - if (!strcasecmp (function->name, ds_cstr (&function_name))) - break; - if (NULL == function->name) - { - msg (SE, _("Unknown aggregation function %s."), - ds_cstr (&function_name)); - goto error; - } - ds_destroy (&function_name); - func_index = function - agr_func_tab; - lex_get (lexer); + int func_index; + enum mv_class exclude; + if (!parse_agr_func_name (lexer, &func_index, &exclude)) + goto error; + const struct agr_func *function = &agr_func_tab[func_index]; /* Check for leading lparen. */ - if (!lex_match (lexer, '(')) + if (!lex_match (lexer, T_LPAREN)) { - if (func_index == N) - func_index = N_NO_VARS; - else if (func_index == NU) - func_index = NU_NO_VARS; - else + if (function->src_vars == AGR_SV_YES) { - lex_error (lexer, _("expecting `('")); + bool ok UNUSED = lex_force_match (lexer, T_LPAREN); goto error; } } else { /* Parse list of source variables. */ - { - int pv_opts = PV_NO_SCRATCH; - - if (func_index == SUM || func_index == MEAN || func_index == SD) - pv_opts |= PV_NUMERIC; - else if (function->n_args) - pv_opts |= PV_SAME_TYPE; - - if (!parse_variables_const (lexer, dict, &src, &n_src, pv_opts)) - goto error; - } + int pv_opts = PV_NO_SCRATCH; + if (func_index == AGRF_SUM || func_index == AGRF_MEAN + || func_index == AGRF_MEDIAN || func_index == AGRF_SD) + pv_opts |= PV_NUMERIC; + else if (function->n_args) + pv_opts |= PV_SAME_TYPE; + + int src_start_ofs = lex_ofs (lexer); + size_t n_src; + if (!parse_variables_const (lexer, dict, &src, &n_src, pv_opts)) + goto error; + int src_end_ofs = lex_ofs (lexer) - 1; /* Parse function arguments, for those functions that require arguments. */ + int args_start_ofs = 0; if (function->n_args != 0) - for (i = 0; i < function->n_args; i++) + for (size_t i = 0; i < function->n_args; i++) { - int type; + lex_match (lexer, T_COMMA); - lex_match (lexer, ','); - if (lex_token (lexer) == T_STRING) - { - arg[i].c = ds_xstrdup (lex_tokstr (lexer)); - type = VAL_STRING; - } - else if (lex_is_number (lexer)) - { - arg[i].f = lex_tokval (lexer); - type = VAL_NUMERIC; - } + enum val_type type; + if (lex_is_string (lexer)) + type = VAL_STRING; + else if (lex_is_number (lexer)) + type = VAL_NUMERIC; else { - msg (SE, _("Missing argument %zu to %s."), - i + 1, function->name); + lex_error (lexer, _("Missing argument %zu to %s."), + i + 1, function->name); goto error; } - lex_get (lexer); - if (type != var_get_type (src[0])) { msg (SE, _("Arguments to %s must be of same type as " "source variables."), function->name); + if (type == VAL_NUMERIC) + { + lex_next_msg (lexer, SN, 0, 0, + _("The argument is numeric.")); + lex_ofs_msg (lexer, SN, src_start_ofs, src_end_ofs, + _("The variables have string type.")); + } + else + { + lex_next_msg (lexer, SN, 0, 0, + _("The argument is a string.")); + lex_ofs_msg (lexer, SN, src_start_ofs, src_end_ofs, + _("The variables are numeric.")); + } goto error; } + + if (i == 0) + args_start_ofs = lex_ofs (lexer); + if (type == VAL_NUMERIC) + arg[i].f = lex_tokval (lexer); + else + arg[i].s = recode_substring_pool (dict_get_encoding (agr->dict), + "UTF-8", lex_tokss (lexer), + NULL); + lex_get (lexer); } + int args_end_ofs = lex_ofs (lexer) - 1; /* Trailing rparen. */ - if (!lex_match (lexer, ')')) - { - lex_error (lexer, _("expecting `)'")); - goto error; - } + if (!lex_force_match (lexer, T_RPAREN)) + goto error; /* Now check that the number of source variables match the number of target variables. If we check earlier @@ -524,162 +543,133 @@ parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, message, i.e. `AGGREGATE x=SUM(y t).' will get this error message when a proper message would be more like `unknown variable t'. */ - if (n_src != n_dest) + if (n_src != n_vars) { msg (SE, _("Number of source variables (%zu) does not match " "number of target variables (%zu)."), - n_src, n_dest); + n_src, n_vars); + lex_ofs_msg (lexer, SN, src_start_ofs, src_end_ofs, + _("These are the source variables.")); + lex_ofs_msg (lexer, SN, dst_start_ofs, dst_end_ofs, + _("These are the target variables.")); goto error; } - if ((func_index == PIN || func_index == POUT - || func_index == FIN || func_index == FOUT) + if ((func_index == AGRF_PIN || func_index == AGRF_POUT + || func_index == AGRF_FIN || func_index == AGRF_FOUT) && (var_is_numeric (src[0]) ? arg[0].f > arg[1].f - : str_compare_rpad (arg[0].c, arg[1].c) > 0)) + : buf_compare_rpad (arg[0].s.string, arg[0].s.length, + arg[1].s.string, arg[1].s.length) > 0)) { - union agr_argument t = arg[0]; + struct agr_argument tmp = arg[0]; arg[0] = arg[1]; - arg[1] = t; + arg[1] = tmp; - msg (SW, _("The value arguments passed to the %s function " - "are out-of-order. They will be treated as if " - "they had been specified in the correct order."), - function->name); + lex_ofs_msg (lexer, SW, args_start_ofs, args_end_ofs, + _("The value arguments passed to the %s function " + "are out of order. They will be treated as if " + "they had been specified in the correct order."), + function->name); } } - /* Finally add these to the linked list of aggregation - variables. */ - for (i = 0; i < n_dest; i++) + /* Finally add these to the aggregation variables. */ + for (size_t i = 0; i < n_vars; i++) { - struct agr_var *v = xzalloc (sizeof *v); - - /* Add variable to chain. */ - if (agr->agr_vars != NULL) - tail->next = v; - else - agr->agr_vars = v; - tail = v; - tail->next = NULL; - v->moments = NULL; - - /* Create the target variable in the aggregate - dictionary. */ - { - struct variable *destvar; - - v->function = func_index; - - if (src) - { - v->src = src[i]; - - if (var_is_alpha (src[i])) - { - v->function |= FSTRING; - v->string = xmalloc (var_get_width (src[i])); - } - - if (function->alpha_type == VAL_STRING) - destvar = dict_clone_var_as (agr->dict, v->src, dest[i]); - else - { - assert (var_is_numeric (v->src) - || function->alpha_type == VAL_NUMERIC); - destvar = dict_create_var (agr->dict, dest[i], 0); - if (destvar != NULL) - { - struct fmt_spec f; - if ((func_index == N || func_index == NMISS) - && dict_get_weight (dict) != NULL) - f = fmt_for_output (FMT_F, 8, 2); - else - f = function->format; - var_set_both_formats (destvar, &f); - } - } - } else { - struct fmt_spec f; - v->src = NULL; - destvar = dict_create_var (agr->dict, dest[i], 0); - if (func_index == N_NO_VARS && dict_get_weight (dict) != NULL) - f = fmt_for_output (FMT_F, 8, 2); - else - f = function->format; - var_set_both_formats (destvar, &f); - } - - if (!destvar) - { - msg (SE, _("Variable name %s is not unique within the " - "aggregate file dictionary, which contains " - "the aggregate variables and the break " - "variables."), - dest[i]); - goto error; - } - - free (dest[i]); - if (dest_label[i]) - var_set_label (destvar, dest_label[i]); + const struct variable *existing_var = dict_lookup_var (agr->dict, + dest[i]); + if (existing_var) + { + if (var_get_dict_index (existing_var) >= starting_n_vars) + lex_ofs_error (lexer, dst_start_ofs, dst_end_ofs, + _("Duplicate target variable name %s."), + dest[i]); + else if (agr->add_variables) + lex_ofs_error (lexer, dst_start_ofs, dst_end_ofs, + _("Variable name %s duplicates the name of a " + "variable in the active file dictionary."), + dest[i]); + else + lex_ofs_error (lexer, dst_start_ofs, dst_end_ofs, + _("Variable name %s duplicates the name of a " + "break variable."), dest[i]); + goto error; + } - v->dest = destvar; - } + /* Add variable. */ + if (agr->n_agr_vars >= allocated_agr_vars) + agr->agr_vars = x2nrealloc (agr->agr_vars, &allocated_agr_vars, + sizeof *agr->agr_vars); + struct agr_var *v = &agr->agr_vars[agr->n_agr_vars++]; + *v = (struct agr_var) { + .exclude = exclude, + .moments = NULL, + .function = func_index, + .src = src ? src[i] : NULL, + }; + + /* Create the target variable in the aggregate dictionary. */ + if (v->src && var_is_alpha (v->src)) + v->string = xmalloc (var_get_width (v->src)); + + if (v->src && function->alpha_type == VAL_STRING) + v->dest = dict_clone_var_as_assert (agr->dict, v->src, dest[i]); + else + { + v->dest = dict_create_var_assert (agr->dict, dest[i], 0); - v->exclude = exclude; + struct fmt_spec f; + if ((func_index == AGRF_N || func_index == AGRF_NMISS) + && dict_get_weight (dict) != NULL) + f = fmt_for_output (FMT_F, 8, 2); + else + f = function->format; + var_set_both_formats (v->dest, &f); + } + if (dest_label[i]) + var_set_label (v->dest, dest_label[i]); if (v->src != NULL) - { - int j; - - if (var_is_numeric (v->src)) - for (j = 0; j < function->n_args; j++) - v->arg[j].f = arg[j].f; - else - for (j = 0; j < function->n_args; j++) - v->arg[j].c = xstrdup (arg[j].c); - } + for (size_t j = 0; j < function->n_args; j++) + v->arg[j] = (struct agr_argument) { + .f = arg[j].f, + .s = arg[j].s.string ? ss_clone (arg[j].s) : ss_empty (), + }; } - if (src != NULL && var_is_alpha (src[0])) - for (i = 0; i < function->n_args; i++) - { - free (arg[i].c); - arg[i].c = NULL; - } + ss_dealloc (&arg[0].s); + ss_dealloc (&arg[1].s); free (src); + for (size_t i = 0; i < n_vars; i++) + { + free (dest[i]); + free (dest_label[i]); + } free (dest); free (dest_label); - if (!lex_match (lexer, '/')) + if (!lex_match (lexer, T_SLASH)) { - if (lex_token (lexer) == '.') + if (lex_token (lexer) == T_ENDCMD) return true; - lex_error (lexer, "expecting end of command"); + lex_error (lexer, "Syntax error expecting end of command."); return false; } continue; error: - ds_destroy (&function_name); - for (i = 0; i < n_dest; i++) + for (size_t i = 0; i < n_vars; i++) { free (dest[i]); free (dest_label[i]); } free (dest); free (dest_label); - free (arg[0].c); - free (arg[1].c); - if (src && n_src && var_is_alpha (src[0])) - for (i = 0; i < function->n_args; i++) - { - free (arg[i].c); - arg[i].c = NULL; - } + ss_dealloc (&arg[0].s); + ss_dealloc (&arg[1].s); free (src); return false; @@ -690,35 +680,25 @@ parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, static void agr_destroy (struct agr_proc *agr) { - struct agr_var *iter, *next; - - subcase_destroy (&agr->sort); + subcase_uninit (&agr->sort); free (agr->break_vars); - case_unref (agr->break_case); - for (iter = agr->agr_vars; iter; iter = next) + for (size_t i = 0; i < agr->n_agr_vars; i++) { - next = iter->next; + struct agr_var *av = &agr->agr_vars[i]; - if (iter->function & FSTRING) - { - size_t n_args; - size_t i; - - n_args = agr_func_tab[iter->function & FUNC].n_args; - for (i = 0; i < n_args; i++) - free (iter->arg[i].c); - free (iter->string); - } - else if (iter->function == SD) - moments1_destroy (iter->moments); + ss_dealloc (&av->arg[0].s); + ss_dealloc (&av->arg[1].s); + free (av->string); - dict_destroy_internal_var (iter->subject); - dict_destroy_internal_var (iter->weight); + if (av->function == AGRF_SD) + moments1_destroy (av->moments); - free (iter); + dict_destroy_internal_var (av->subject); + dict_destroy_internal_var (av->weight); } + free (agr->agr_vars); if (agr->dict != NULL) - dict_destroy (agr->dict); + dict_unref (agr->dict); } /* Execution. */ @@ -727,405 +707,426 @@ agr_destroy (struct agr_proc *agr) static void accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) { - struct agr_var *iter; - double weight; bool bad_warn = true; + double weight = dict_get_case_weight (agr->src_dict, input, &bad_warn); + for (size_t i = 0; i < agr->n_agr_vars; i++) + { + struct agr_var *av = &agr->agr_vars[i]; + if (av->src) + { + bool is_string = var_is_alpha (av->src); + const union value *v = case_data (input, av->src); + int src_width = var_get_width (av->src); + const struct substring vs = (src_width > 0 + ? value_ss (v, src_width) + : ss_empty ()); + + if (var_is_value_missing (av->src, v) & av->exclude) + { + switch (av->function) + { + case AGRF_NMISS: + av->dbl += weight; + break; + + case AGRF_NUMISS: + av->int1++; + break; + + case AGRF_SUM: + case AGRF_MEAN: + case AGRF_MEDIAN: + case AGRF_SD: + case AGRF_MAX: + case AGRF_MIN: + case AGRF_PGT: + case AGRF_PLT: + case AGRF_PIN: + case AGRF_POUT: + case AGRF_FGT: + case AGRF_FLT: + case AGRF_FIN: + case AGRF_FOUT: + case AGRF_CGT: + case AGRF_CLT: + case AGRF_CIN: + case AGRF_COUT: + case AGRF_N: + case AGRF_NU: + case AGRF_FIRST: + case AGRF_LAST: + break; + } + av->saw_missing = true; + continue; + } - weight = dict_get_case_weight (agr->src_dict, input, &bad_warn); - - for (iter = agr->agr_vars; iter; iter = iter->next) - if (iter->src) - { - const union value *v = case_data (input, iter->src); - int src_width = var_get_width (iter->src); - - if (var_is_value_missing (iter->src, v, iter->exclude)) - { - switch (iter->function) - { - case NMISS: - case NMISS | FSTRING: - iter->dbl[0] += weight; - break; - case NUMISS: - case NUMISS | FSTRING: - iter->int1++; - break; - } - iter->saw_missing = true; - continue; - } - - /* This is horrible. There are too many possibilities. */ - switch (iter->function) - { - case SUM: - iter->dbl[0] += v->f * weight; - iter->int1 = 1; - break; - case MEAN: - iter->dbl[0] += v->f * weight; - iter->dbl[1] += weight; - break; - case MEDIAN: - { - double wv ; - struct ccase *cout; - - cout = case_create (casewriter_get_proto (iter->writer)); - - case_data_rw (cout, iter->subject)->f - = case_data (input, iter->src)->f; - - wv = dict_get_case_weight (agr->src_dict, input, NULL); - - case_data_rw (cout, iter->weight)->f = wv; - - iter->cc += wv; - - casewriter_write (iter->writer, cout); - } - break; - case SD: - moments1_add (iter->moments, v->f, weight); - break; - case MAX: - iter->dbl[0] = MAX (iter->dbl[0], v->f); - iter->int1 = 1; - break; - case MAX | FSTRING: - if (memcmp (iter->string, value_str (v, src_width), src_width) < 0) - memcpy (iter->string, value_str (v, src_width), src_width); - iter->int1 = 1; - break; - case MIN: - iter->dbl[0] = MIN (iter->dbl[0], v->f); - iter->int1 = 1; - break; - case MIN | FSTRING: - if (memcmp (iter->string, value_str (v, src_width), src_width) > 0) - memcpy (iter->string, value_str (v, src_width), src_width); - iter->int1 = 1; - break; - case FGT: - case PGT: - if (v->f > iter->arg[0].f) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FGT | FSTRING: - case PGT | FSTRING: - if (memcmp (iter->arg[0].c, - value_str (v, src_width), src_width) < 0) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FLT: - case PLT: - if (v->f < iter->arg[0].f) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FLT | FSTRING: - case PLT | FSTRING: - if (memcmp (iter->arg[0].c, - value_str (v, src_width), src_width) > 0) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FIN: - case PIN: - if (iter->arg[0].f <= v->f && v->f <= iter->arg[1].f) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FIN | FSTRING: - case PIN | FSTRING: - if (memcmp (iter->arg[0].c, - value_str (v, src_width), src_width) <= 0 - && memcmp (iter->arg[1].c, - value_str (v, src_width), src_width) >= 0) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FOUT: - case POUT: - if (iter->arg[0].f > v->f || v->f > iter->arg[1].f) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case FOUT | FSTRING: - case POUT | FSTRING: - if (memcmp (iter->arg[0].c, - value_str (v, src_width), src_width) > 0 - || memcmp (iter->arg[1].c, - value_str (v, src_width), src_width) < 0) - iter->dbl[0] += weight; - iter->dbl[1] += weight; - break; - case N: - case N | FSTRING: - iter->dbl[0] += weight; - break; - case NU: - case NU | FSTRING: - iter->int1++; - break; - case FIRST: - if (iter->int1 == 0) - { - iter->dbl[0] = v->f; - iter->int1 = 1; - } - break; - case FIRST | FSTRING: - if (iter->int1 == 0) - { - memcpy (iter->string, value_str (v, src_width), src_width); - iter->int1 = 1; - } - break; - case LAST: - iter->dbl[0] = v->f; - iter->int1 = 1; - break; - case LAST | FSTRING: - memcpy (iter->string, value_str (v, src_width), src_width); - iter->int1 = 1; - break; - case NMISS: - case NMISS | FSTRING: - case NUMISS: - case NUMISS | FSTRING: - /* Our value is not missing or it would have been - caught earlier. Nothing to do. */ - break; - default: - NOT_REACHED (); - } - } else { - switch (iter->function) - { - case N_NO_VARS: - iter->dbl[0] += weight; - break; - case NU_NO_VARS: - iter->int1++; - break; - default: - NOT_REACHED (); - } + /* This is horrible. There are too many possibilities. */ + av->W += weight; + switch (av->function) + { + case AGRF_SUM: + av->dbl += v->f * weight; + av->int1 = 1; + break; + + case AGRF_MEAN: + av->dbl += v->f * weight; + break; + + case AGRF_MEDIAN: + { + struct ccase *cout = case_create (casewriter_get_proto (av->writer)); + *case_num_rw (cout, av->subject) = case_num (input, av->src); + *case_num_rw (cout, av->weight) = weight; + casewriter_write (av->writer, cout); + } + break; + + case AGRF_SD: + moments1_add (av->moments, v->f, weight); + break; + + case AGRF_MAX: + if (!is_string) + av->dbl = MAX (av->dbl, v->f); + else if (memcmp (av->string, v->s, src_width) < 0) + memcpy (av->string, v->s, src_width); + av->int1 = 1; + break; + + case AGRF_MIN: + if (!is_string) + av->dbl = MIN (av->dbl, v->f); + else if (memcmp (av->string, v->s, src_width) > 0) + memcpy (av->string, v->s, src_width); + av->dbl = MIN (av->dbl, v->f); + av->int1 = 1; + break; + + case AGRF_FGT: + case AGRF_PGT: + case AGRF_CGT: + if (is_string + ? ss_compare_rpad (av->arg[0].s, vs) < 0 + : v->f > av->arg[0].f) + av->dbl += weight; + break; + + case AGRF_FLT: + case AGRF_PLT: + case AGRF_CLT: + if (is_string + ? ss_compare_rpad (av->arg[0].s, vs) > 0 + : v->f < av->arg[0].f) + av->dbl += weight; + break; + + case AGRF_FIN: + case AGRF_PIN: + case AGRF_CIN: + if (is_string + ? (ss_compare_rpad (av->arg[0].s, vs) <= 0 + && ss_compare_rpad (av->arg[1].s, vs) >= 0) + : av->arg[0].f <= v->f && v->f <= av->arg[1].f) + av->dbl += weight; + break; + + case AGRF_FOUT: + case AGRF_POUT: + case AGRF_COUT: + if (is_string + ? (ss_compare_rpad (av->arg[0].s, vs) > 0 + || ss_compare_rpad (av->arg[1].s, vs) < 0) + : av->arg[0].f > v->f || v->f > av->arg[1].f) + av->dbl += weight; + break; + + case AGRF_N: + av->dbl += weight; + break; + + case AGRF_NU: + av->int1++; + break; + + case AGRF_FIRST: + if (av->int1 == 0) + { + if (is_string) + memcpy (av->string, v->s, src_width); + else + av->dbl = v->f; + av->int1 = 1; + } + break; + + case AGRF_LAST: + if (is_string) + memcpy (av->string, v->s, src_width); + else + av->dbl = v->f; + av->int1 = 1; + break; + + case AGRF_NMISS: + case AGRF_NUMISS: + /* Our value is not missing or it would have been + caught earlier. Nothing to do. */ + break; + } + } + else + { + av->W += weight; + switch (av->function) + { + case AGRF_N: + break; + + case AGRF_NU: + av->int1++; + break; + + case AGRF_SUM: + case AGRF_MEAN: + case AGRF_MEDIAN: + case AGRF_SD: + case AGRF_MAX: + case AGRF_MIN: + case AGRF_PGT: + case AGRF_PLT: + case AGRF_PIN: + case AGRF_POUT: + case AGRF_FGT: + case AGRF_FLT: + case AGRF_FIN: + case AGRF_FOUT: + case AGRF_CGT: + case AGRF_CLT: + case AGRF_CIN: + case AGRF_COUT: + case AGRF_NMISS: + case AGRF_NUMISS: + case AGRF_FIRST: + case AGRF_LAST: + NOT_REACHED (); + } + } } } /* Writes an aggregated record to OUTPUT. */ static void -dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) +dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case) { struct ccase *c = case_create (dict_get_proto (agr->dict)); - { - int value_idx = 0; - int i; - - for (i = 0; i < agr->break_var_cnt; i++) - { - const struct variable *v = agr->break_vars[i]; - value_copy (case_data_rw_idx (c, value_idx), - case_data (agr->break_case, v), - var_get_width (v)); - value_idx++; - } - } + if (agr->add_variables) + { + case_copy (c, 0, break_case, 0, dict_get_n_vars (agr->src_dict)); + } + else + { + int value_idx = 0; - { - struct agr_var *i; + for (size_t i = 0; i < agr->break_n_vars; i++) + { + const struct variable *v = agr->break_vars[i]; + value_copy (case_data_rw_idx (c, value_idx), + case_data (break_case, v), + var_get_width (v)); + value_idx++; + } + } - for (i = agr->agr_vars; i; i = i->next) - { - union value *v = case_data_rw (c, i->dest); - int width = var_get_width (i->dest); + for (size_t i = 0; i < agr->n_agr_vars; i++) + { + struct agr_var *av = &agr->agr_vars[i]; + union value *v = case_data_rw (c, av->dest); + int width = var_get_width (av->dest); + + if (agr->missing == COLUMNWISE && av->saw_missing + && av->function != AGRF_N + && av->function != AGRF_NU + && av->function != AGRF_NMISS + && av->function != AGRF_NUMISS) + { + value_set_missing (v, width); + casewriter_destroy (av->writer); + continue; + } - if (agr->missing == COLUMNWISE && i->saw_missing - && (i->function & FUNC) != N && (i->function & FUNC) != NU - && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS) - { - value_set_missing (v, width); - casewriter_destroy (i->writer); - continue; - } + switch (av->function) + { + case AGRF_SUM: + v->f = av->int1 ? av->dbl : SYSMIS; + break; - switch (i->function) - { - case SUM: - v->f = i->int1 ? i->dbl[0] : SYSMIS; - break; - case MEAN: - v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS; - break; - case MEDIAN: - { - struct casereader *sorted_reader; - struct percentile *median = percentile_create (0.5, i->cc); - struct order_stats *os = &median->parent; + case AGRF_MEAN: + v->f = av->W != 0.0 ? av->dbl / av->W : SYSMIS; + break; - sorted_reader = casewriter_make_reader (i->writer); + case AGRF_MEDIAN: + { + if (av->writer) + { + struct percentile *median = percentile_create (0.5, av->W); + struct order_stats *os = &median->parent; + struct casereader *sorted_reader = casewriter_make_reader (av->writer); + av->writer = NULL; + + order_stats_accumulate (&os, 1, + sorted_reader, + av->weight, + av->subject, + av->exclude); + av->dbl = percentile_calculate (median, PC_HAVERAGE); + statistic_destroy (&median->parent.parent); + } + v->f = av->dbl; + } + break; - order_stats_accumulate (&os, 1, - sorted_reader, - i->weight, - i->subject, - i->exclude); + case AGRF_SD: + { + double variance; - v->f = percentile_calculate (median, PC_HAVERAGE); + moments1_calculate (av->moments, NULL, NULL, &variance, + NULL, NULL); + v->f = variance != SYSMIS ? sqrt (variance) : SYSMIS; + } + break; - statistic_destroy (&median->parent.parent); - } - break; - case SD: + case AGRF_MAX: + case AGRF_MIN: + case AGRF_FIRST: + case AGRF_LAST: + if (!width) + v->f = av->int1 ? av->dbl : SYSMIS; + else { - double variance; - - /* FIXME: we should use two passes. */ - moments1_calculate (i->moments, NULL, NULL, &variance, - NULL, NULL); - if (variance != SYSMIS) - v->f = sqrt (variance); + if (av->int1) + memcpy (v->s, av->string, width); else - v->f = SYSMIS; + value_set_missing (v, width); } - break; - case MAX: - case MIN: - v->f = i->int1 ? i->dbl[0] : SYSMIS; - break; - case MAX | FSTRING: - case MIN | FSTRING: - if (i->int1) - memcpy (value_str_rw (v, width), i->string, width); - else - value_set_missing (v, width); - break; - case FGT: - case FGT | FSTRING: - case FLT: - case FLT | FSTRING: - case FIN: - case FIN | FSTRING: - case FOUT: - case FOUT | FSTRING: - v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS; - break; - case PGT: - case PGT | FSTRING: - case PLT: - case PLT | FSTRING: - case PIN: - case PIN | FSTRING: - case POUT: - case POUT | FSTRING: - v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS; - break; - case N: - case N | FSTRING: - v->f = i->dbl[0]; - break; - case NU: - case NU | FSTRING: - v->f = i->int1; - break; - case FIRST: - case LAST: - v->f = i->int1 ? i->dbl[0] : SYSMIS; - break; - case FIRST | FSTRING: - case LAST | FSTRING: - if (i->int1) - memcpy (value_str_rw (v, width), i->string, width); - else - value_set_missing (v, width); - break; - case N_NO_VARS: - v->f = i->dbl[0]; - break; - case NU_NO_VARS: - v->f = i->int1; - break; - case NMISS: - case NMISS | FSTRING: - v->f = i->dbl[0]; - break; - case NUMISS: - case NUMISS | FSTRING: - v->f = i->int1; - break; - default: - NOT_REACHED (); - } - } - } + break; + + case AGRF_FGT: + case AGRF_FLT: + case AGRF_FIN: + case AGRF_FOUT: + v->f = av->W ? av->dbl / av->W : SYSMIS; + break; + + case AGRF_PGT: + case AGRF_PLT: + case AGRF_PIN: + case AGRF_POUT: + v->f = av->W ? av->dbl / av->W * 100.0 : SYSMIS; + break; + + case AGRF_CGT: + case AGRF_CLT: + case AGRF_CIN: + case AGRF_COUT: + v->f = av->dbl; + break; + + case AGRF_N: + v->f = av->W; + break; + + case AGRF_NU: + case AGRF_NUMISS: + v->f = av->int1; + break; + + case AGRF_NMISS: + v->f = av->dbl; + break; + } + } casewriter_write (output, c); } /* Resets the state for all the aggregate functions. */ static void -initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) +initialize_aggregate_info (struct agr_proc *agr) { - struct agr_var *iter; - - case_unref (agr->break_case); - agr->break_case = case_ref (input); - - for (iter = agr->agr_vars; iter; iter = iter->next) + for (size_t i = 0; i < agr->n_agr_vars; i++) { - iter->saw_missing = false; - iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0; - iter->int1 = iter->int2 = 0; - switch (iter->function) + struct agr_var *av = &agr->agr_vars[i]; + av->saw_missing = false; + av->dbl = av->W = 0.0; + av->int1 = 0; + + int width = av->src ? var_get_width (av->src) : 0; + switch (av->function) { - case MIN: - iter->dbl[0] = DBL_MAX; - break; - case MIN | FSTRING: - memset (iter->string, 255, var_get_width (iter->src)); - break; - case MAX: - iter->dbl[0] = -DBL_MAX; + case AGRF_MIN: + if (!width) + av->dbl = DBL_MAX; + else + memset (av->string, 255, width); break; - case MAX | FSTRING: - memset (iter->string, 0, var_get_width (iter->src)); + + case AGRF_MAX: + if (!width) + av->dbl = -DBL_MAX; + else + memset (av->string, 0, width); break; - case MEDIAN: - { - struct caseproto *proto; - struct subcase ordering; - proto = caseproto_create (); + case AGRF_MEDIAN: + { + struct caseproto *proto = caseproto_create (); proto = caseproto_add_width (proto, 0); proto = caseproto_add_width (proto, 0); - if ( ! iter->subject) - iter->subject = dict_create_internal_var (0, 0); + if (! av->subject) + av->subject = dict_create_internal_var (0, 0); - if ( ! iter->weight) - iter->weight = dict_create_internal_var (1, 0); + if (! av->weight) + av->weight = dict_create_internal_var (1, 0); - subcase_init_var (&ordering, iter->subject, SC_ASCEND); - iter->writer = sort_create_writer (&ordering, proto); - subcase_destroy (&ordering); + struct subcase ordering; + subcase_init_var (&ordering, av->subject, SC_ASCEND); + av->writer = sort_create_writer (&ordering, proto); + subcase_uninit (&ordering); caseproto_unref (proto); - - iter->cc = 0; } break; - case SD: - if (iter->moments == NULL) - iter->moments = moments1_create (MOMENT_VARIANCE); + + case AGRF_SD: + if (av->moments == NULL) + av->moments = moments1_create (MOMENT_VARIANCE); else - moments1_clear (iter->moments); + moments1_clear (av->moments); break; - default: + + case AGRF_SUM: + case AGRF_MEAN: + case AGRF_PGT: + case AGRF_PLT: + case AGRF_PIN: + case AGRF_POUT: + case AGRF_FGT: + case AGRF_FLT: + case AGRF_FIN: + case AGRF_FOUT: + case AGRF_CGT: + case AGRF_CLT: + case AGRF_CIN: + case AGRF_COUT: + case AGRF_N: + case AGRF_NU: + case AGRF_NMISS: + case AGRF_NUMISS: + case AGRF_FIRST: + case AGRF_LAST: break; } }