1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "language/stats/aggregate.h"
23 #include "data/any-writer.h"
24 #include "data/case.h"
25 #include "data/casegrouper.h"
26 #include "data/casereader.h"
27 #include "data/casewriter.h"
28 #include "data/dataset.h"
29 #include "data/dictionary.h"
30 #include "data/file-handle-def.h"
31 #include "data/format.h"
32 #include "data/settings.h"
33 #include "data/subcase.h"
34 #include "data/sys-file-writer.h"
35 #include "data/variable.h"
36 #include "language/command.h"
37 #include "language/data-io/file-handle.h"
38 #include "language/lexer/lexer.h"
39 #include "language/lexer/variable-parser.h"
40 #include "language/stats/sort-criteria.h"
41 #include "libpspp/assertion.h"
42 #include "libpspp/i18n.h"
43 #include "libpspp/message.h"
44 #include "libpspp/misc.h"
45 #include "libpspp/pool.h"
46 #include "libpspp/str.h"
47 #include "math/moments.h"
48 #include "math/percentiles.h"
49 #include "math/sort.h"
50 #include "math/statistic.h"
52 #include "gl/minmax.h"
53 #include "gl/xalloc.h"
56 #define _(msgid) gettext (msgid)
57 #define N_(msgid) msgid
59 /* Argument for AGGREGATE function. */
62 double f; /* Numeric. */
63 char *c; /* Short or long string. */
66 /* Specifies how to make an aggregate variable. */
69 struct agr_var *next; /* Next in list. */
71 /* Collected during parsing. */
72 const struct variable *src; /* Source variable. */
73 struct variable *dest; /* Target variable. */
74 int function; /* Function. */
75 enum mv_class exclude; /* Classes of missing values to exclude. */
76 union agr_argument arg[2]; /* Arguments. */
78 /* Accumulated during AGGREGATE execution. */
83 struct moments1 *moments;
86 struct variable *subject;
87 struct variable *weight;
88 struct casewriter *writer;
92 /* Attributes of aggregation functions. */
93 const struct agr_func agr_func_tab[] =
95 {"SUM", N_("Sum of values"), AGR_SV_YES, 0, -1, {FMT_F, 8, 2}},
96 {"MEAN", N_("Mean average"), AGR_SV_YES, 0, -1, {FMT_F, 8, 2}},
97 {"MEDIAN", N_("Median average"), AGR_SV_YES, 0, -1, {FMT_F, 8, 2}},
98 {"SD", N_("Standard deviation"), AGR_SV_YES, 0, -1, {FMT_F, 8, 2}},
99 {"MAX", N_("Maximum value"), AGR_SV_YES, 0, VAL_STRING, {-1, -1, -1}},
100 {"MIN", N_("Minimum value"), AGR_SV_YES, 0, VAL_STRING, {-1, -1, -1}},
101 {"PGT", N_("Percentage greater than"), AGR_SV_YES, 1, VAL_NUMERIC, {FMT_F, 5, 1}},
102 {"PLT", N_("Percentage less than"), AGR_SV_YES, 1, VAL_NUMERIC, {FMT_F, 5, 1}},
103 {"PIN", N_("Percentage included in range"), AGR_SV_YES, 2, VAL_NUMERIC, {FMT_F, 5, 1}},
104 {"POUT", N_("Percentage excluded from range"), AGR_SV_YES, 2, VAL_NUMERIC, {FMT_F, 5, 1}},
105 {"FGT", N_("Fraction greater than"), AGR_SV_YES, 1, VAL_NUMERIC, {FMT_F, 5, 3}},
106 {"FLT", N_("Fraction less than"), AGR_SV_YES, 1, VAL_NUMERIC, {FMT_F, 5, 3}},
107 {"FIN", N_("Fraction included in range"), AGR_SV_YES, 2, VAL_NUMERIC, {FMT_F, 5, 3}},
108 {"FOUT", N_("Fraction excluded from range"), AGR_SV_YES, 2, VAL_NUMERIC, {FMT_F, 5, 3}},
109 {"N", N_("Number of cases"), AGR_SV_NO, 0, VAL_NUMERIC, {FMT_F, 7, 0}},
110 {"NU", N_("Number of cases (unweighted)"), AGR_SV_OPT, 0, VAL_NUMERIC, {FMT_F, 7, 0}},
111 {"NMISS", N_("Number of missing values"), AGR_SV_YES, 0, VAL_NUMERIC, {FMT_F, 7, 0}},
112 {"NUMISS", N_("Number of missing values (unweighted)"), AGR_SV_YES, 0, VAL_NUMERIC, {FMT_F, 7, 0}},
113 {"FIRST", N_("First non-missing value"), AGR_SV_YES, 0, VAL_STRING, {-1, -1, -1}},
114 {"LAST", N_("Last non-missing value"), AGR_SV_YES, 0, VAL_STRING, {-1, -1, -1}},
115 {NULL, NULL, AGR_SV_NO, 0, -1, {-1, -1, -1}},
118 /* Missing value types. */
119 enum missing_treatment
121 ITEMWISE, /* Missing values item by item. */
122 COLUMNWISE /* Missing values column by column. */
125 /* An entire AGGREGATE procedure. */
128 /* Break variables. */
129 struct subcase sort; /* Sort criteria (break variables). */
130 const struct variable **break_vars; /* Break variables. */
131 size_t break_var_cnt; /* Number of break variables. */
133 enum missing_treatment missing; /* How to treat missing values. */
134 struct agr_var *agr_vars; /* First aggregate variable. */
135 struct dictionary *dict; /* Aggregate dictionary. */
136 const struct dictionary *src_dict; /* Dict of the source */
137 int case_cnt; /* Counts aggregated cases. */
139 bool add_variables; /* True iff the aggregated variables should
140 be appended to the existing dictionary */
143 static void initialize_aggregate_info (struct agr_proc *);
145 static void accumulate_aggregate_info (struct agr_proc *,
146 const struct ccase *);
148 static bool parse_aggregate_functions (struct lexer *, const struct dictionary *,
150 static void agr_destroy (struct agr_proc *);
151 static void dump_aggregate_info (const struct agr_proc *agr,
152 struct casewriter *output,
153 const struct ccase *break_case);
157 /* Parses and executes the AGGREGATE procedure. */
159 cmd_aggregate (struct lexer *lexer, struct dataset *ds)
161 struct dictionary *dict = dataset_dict (ds);
163 struct file_handle *out_file = NULL;
164 struct casereader *input = NULL, *group;
165 struct casegrouper *grouper;
166 struct casewriter *output = NULL;
168 bool copy_documents = false;
169 bool presorted = false;
173 memset(&agr, 0 , sizeof (agr));
174 agr.missing = ITEMWISE;
176 subcase_init_empty (&agr.sort);
178 /* OUTFILE subcommand must be first. */
179 lex_match (lexer, T_SLASH);
180 if (!lex_force_match_id (lexer, "OUTFILE"))
182 lex_match (lexer, T_EQUALS);
183 if (!lex_match (lexer, T_ASTERISK))
185 out_file = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
186 if (out_file == NULL)
190 if (out_file == NULL && lex_match_id (lexer, "MODE"))
192 lex_match (lexer, T_EQUALS);
193 if (lex_match_id (lexer, "ADDVARIABLES"))
195 agr.add_variables = true;
197 /* presorted is assumed in ADDVARIABLES mode */
200 else if (lex_match_id (lexer, "REPLACE"))
202 agr.add_variables = false;
208 if ( agr.add_variables )
209 agr.dict = dict_clone (dict);
211 agr.dict = dict_create ();
213 dict_set_label (agr.dict, dict_get_label (dict));
214 dict_set_documents (agr.dict, dict_get_documents (dict));
216 /* Read most of the subcommands. */
219 lex_match (lexer, T_SLASH);
221 if (lex_match_id (lexer, "MISSING"))
223 lex_match (lexer, T_EQUALS);
224 if (!lex_match_id (lexer, "COLUMNWISE"))
226 lex_error (lexer, _("expecting %s"), "COLUMNWISE");
229 agr.missing = COLUMNWISE;
231 else if (lex_match_id (lexer, "DOCUMENT"))
232 copy_documents = true;
233 else if (lex_match_id (lexer, "PRESORTED"))
235 else if (lex_force_match_id (lexer, "BREAK"))
239 lex_match (lexer, T_EQUALS);
240 if (!parse_sort_criteria (lexer, dict, &agr.sort, &agr.break_vars,
243 agr.break_var_cnt = subcase_get_n_fields (&agr.sort);
245 if (! agr.add_variables)
246 for (i = 0; i < agr.break_var_cnt; i++)
247 dict_clone_var_assert (agr.dict, agr.break_vars[i]);
249 /* BREAK must follow the options. */
256 if (presorted && saw_direction)
257 msg (SW, _("When PRESORTED is specified, specifying sorting directions "
258 "with (A) or (D) has no effect. Output data will be sorted "
259 "the same way as the input data."));
261 /* Read in the aggregate functions. */
262 lex_match (lexer, T_SLASH);
263 if (!parse_aggregate_functions (lexer, dict, &agr))
266 /* Delete documents. */
268 dict_clear_documents (agr.dict);
270 /* Cancel SPLIT FILE. */
271 dict_set_split_vars (agr.dict, NULL, 0);
276 if (out_file == NULL)
278 /* The active dataset will be replaced by the aggregated data,
279 so TEMPORARY is moot. */
280 proc_cancel_temporary_transformations (ds);
281 proc_discard_output (ds);
282 output = autopaging_writer_create (dict_get_proto (agr.dict));
286 output = any_writer_open (out_file, agr.dict);
291 input = proc_open (ds);
292 if (!subcase_is_empty (&agr.sort) && !presorted)
294 input = sort_execute (input, &agr.sort);
295 subcase_clear (&agr.sort);
298 for (grouper = casegrouper_create_vars (input, agr.break_vars,
300 casegrouper_get_next_group (grouper, &group);
301 casereader_destroy (group))
303 struct casereader *placeholder = NULL;
304 struct ccase *c = casereader_peek (group, 0);
308 casereader_destroy (group);
312 initialize_aggregate_info (&agr);
314 if ( agr.add_variables )
315 placeholder = casereader_clone (group);
319 for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
320 accumulate_aggregate_info (&agr, cg);
324 if (agr.add_variables)
327 for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
328 dump_aggregate_info (&agr, output, cg);
330 casereader_destroy (placeholder);
334 dump_aggregate_info (&agr, output, c);
338 if (!casegrouper_destroy (grouper))
341 if (!proc_commit (ds))
348 if (out_file == NULL)
350 struct casereader *next_input = casewriter_make_reader (output);
351 if (next_input == NULL)
354 dataset_set_dict (ds, agr.dict);
355 dataset_set_source (ds, next_input);
360 ok = casewriter_destroy (output);
373 casewriter_destroy (output);
376 return CMD_CASCADING_FAILURE;
379 /* Parse all the aggregate functions. */
381 parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict,
382 struct agr_proc *agr)
384 struct agr_var *tail; /* Tail of linked list starting at agr->vars. */
386 /* Parse everything. */
393 struct string function_name;
395 enum mv_class exclude;
396 const struct agr_func *function;
399 union agr_argument arg[2];
401 const struct variable **src;
414 ds_init_empty (&function_name);
416 /* Parse the list of target variables. */
417 while (!lex_match (lexer, T_EQUALS))
419 size_t n_dest_prev = n_dest;
421 if (!parse_DATA_LIST_vars (lexer, dict, &dest, &n_dest,
422 (PV_APPEND | PV_SINGLE | PV_NO_SCRATCH
426 /* Assign empty labels. */
430 dest_label = xnrealloc (dest_label, n_dest, sizeof *dest_label);
431 for (j = n_dest_prev; j < n_dest; j++)
432 dest_label[j] = NULL;
437 if (lex_is_string (lexer))
439 dest_label[n_dest - 1] = xstrdup (lex_tokcstr (lexer));
444 /* Get the name of the aggregation function. */
445 if (lex_token (lexer) != T_ID)
447 lex_error (lexer, _("expecting aggregation function"));
451 ds_assign_substring (&function_name, lex_tokss (lexer));
452 exclude = ds_chomp_byte (&function_name, '.') ? MV_SYSTEM : MV_ANY;
454 for (function = agr_func_tab; function->name; function++)
455 if (!strcasecmp (function->name, ds_cstr (&function_name)))
457 if (NULL == function->name)
459 msg (SE, _("Unknown aggregation function %s."),
460 ds_cstr (&function_name));
463 ds_destroy (&function_name);
464 func_index = function - agr_func_tab;
467 /* Check for leading lparen. */
468 if (!lex_match (lexer, T_LPAREN))
470 if (function->src_vars == AGR_SV_YES)
472 lex_force_match (lexer, T_LPAREN);
478 /* Parse list of source variables. */
480 int pv_opts = PV_NO_SCRATCH;
482 if (func_index == SUM || func_index == MEAN || func_index == SD)
483 pv_opts |= PV_NUMERIC;
484 else if (function->n_args)
485 pv_opts |= PV_SAME_TYPE;
487 if (!parse_variables_const (lexer, dict, &src, &n_src, pv_opts))
491 /* Parse function arguments, for those functions that
492 require arguments. */
493 if (function->n_args != 0)
494 for (i = 0; i < function->n_args; i++)
498 lex_match (lexer, T_COMMA);
499 if (lex_is_string (lexer))
501 arg[i].c = recode_string (dict_get_encoding (agr->dict),
502 "UTF-8", lex_tokcstr (lexer),
506 else if (lex_is_number (lexer))
508 arg[i].f = lex_tokval (lexer);
513 msg (SE, _("Missing argument %zu to %s."),
514 i + 1, function->name);
520 if (type != var_get_type (src[0]))
522 msg (SE, _("Arguments to %s must be of same type as "
523 "source variables."),
529 /* Trailing rparen. */
530 if (!lex_force_match (lexer, T_RPAREN))
533 /* Now check that the number of source variables match
534 the number of target variables. If we check earlier
535 than this, the user can get very misleading error
536 message, i.e. `AGGREGATE x=SUM(y t).' will get this
537 error message when a proper message would be more
538 like `unknown variable t'. */
541 msg (SE, _("Number of source variables (%zu) does not match "
542 "number of target variables (%zu)."),
547 if ((func_index == PIN || func_index == POUT
548 || func_index == FIN || func_index == FOUT)
549 && (var_is_numeric (src[0])
550 ? arg[0].f > arg[1].f
551 : str_compare_rpad (arg[0].c, arg[1].c) > 0))
553 union agr_argument t = arg[0];
557 msg (SW, _("The value arguments passed to the %s function "
558 "are out-of-order. They will be treated as if "
559 "they had been specified in the correct order."),
564 /* Finally add these to the linked list of aggregation
566 for (i = 0; i < n_dest; i++)
568 struct agr_var *v = xzalloc (sizeof *v);
570 /* Add variable to chain. */
571 if (agr->agr_vars != NULL)
579 /* Create the target variable in the aggregate
582 struct variable *destvar;
584 v->function = func_index;
590 if (var_is_alpha (src[i]))
592 v->function |= FSTRING;
593 v->string = xmalloc (var_get_width (src[i]));
596 if (function->alpha_type == VAL_STRING)
597 destvar = dict_clone_var_as (agr->dict, v->src, dest[i]);
600 assert (var_is_numeric (v->src)
601 || function->alpha_type == VAL_NUMERIC);
602 destvar = dict_create_var (agr->dict, dest[i], 0);
606 if ((func_index == N || func_index == NMISS)
607 && dict_get_weight (dict) != NULL)
608 f = fmt_for_output (FMT_F, 8, 2);
610 f = function->format;
611 var_set_both_formats (destvar, &f);
617 destvar = dict_create_var (agr->dict, dest[i], 0);
620 if ((func_index == N || func_index == NMISS)
621 && dict_get_weight (dict) != NULL)
622 f = fmt_for_output (FMT_F, 8, 2);
624 f = function->format;
625 var_set_both_formats (destvar, &f);
631 msg (SE, _("Variable name %s is not unique within the "
632 "aggregate file dictionary, which contains "
633 "the aggregate variables and the break "
641 var_set_label (destvar, dest_label[i],
642 dict_get_encoding (agr->dict), true);
647 v->exclude = exclude;
653 if (var_is_numeric (v->src))
654 for (j = 0; j < function->n_args; j++)
655 v->arg[j].f = arg[j].f;
657 for (j = 0; j < function->n_args; j++)
658 v->arg[j].c = xstrdup (arg[j].c);
662 if (src != NULL && var_is_alpha (src[0]))
663 for (i = 0; i < function->n_args; i++)
673 if (!lex_match (lexer, T_SLASH))
675 if (lex_token (lexer) == T_ENDCMD)
678 lex_error (lexer, "expecting end of command");
684 ds_destroy (&function_name);
685 for (i = 0; i < n_dest; i++)
688 free (dest_label[i]);
694 if (src && n_src && var_is_alpha (src[0]))
695 for (i = 0; i < function->n_args; i++)
708 agr_destroy (struct agr_proc *agr)
710 struct agr_var *iter, *next;
712 subcase_destroy (&agr->sort);
713 free (agr->break_vars);
714 for (iter = agr->agr_vars; iter; iter = next)
718 if (iter->function & FSTRING)
723 n_args = agr_func_tab[iter->function & FUNC].n_args;
724 for (i = 0; i < n_args; i++)
725 free (iter->arg[i].c);
728 else if (iter->function == SD)
729 moments1_destroy (iter->moments);
731 dict_destroy_internal_var (iter->subject);
732 dict_destroy_internal_var (iter->weight);
736 if (agr->dict != NULL)
737 dict_destroy (agr->dict);
742 /* Accumulates aggregation data from the case INPUT. */
744 accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input)
746 struct agr_var *iter;
748 bool bad_warn = true;
750 weight = dict_get_case_weight (agr->src_dict, input, &bad_warn);
752 for (iter = agr->agr_vars; iter; iter = iter->next)
755 const union value *v = case_data (input, iter->src);
756 int src_width = var_get_width (iter->src);
758 if (var_is_value_missing (iter->src, v, iter->exclude))
760 switch (iter->function)
763 case NMISS | FSTRING:
764 iter->dbl[0] += weight;
767 case NUMISS | FSTRING:
771 iter->saw_missing = true;
775 /* This is horrible. There are too many possibilities. */
776 switch (iter->function)
779 iter->dbl[0] += v->f * weight;
783 iter->dbl[0] += v->f * weight;
784 iter->dbl[1] += weight;
791 cout = case_create (casewriter_get_proto (iter->writer));
793 case_data_rw (cout, iter->subject)->f
794 = case_data (input, iter->src)->f;
796 wv = dict_get_case_weight (agr->src_dict, input, NULL);
798 case_data_rw (cout, iter->weight)->f = wv;
802 casewriter_write (iter->writer, cout);
806 moments1_add (iter->moments, v->f, weight);
809 iter->dbl[0] = MAX (iter->dbl[0], v->f);
813 /* Need to do some kind of Unicode collation thingy here */
814 if (memcmp (iter->string, value_str (v, src_width), src_width) < 0)
815 memcpy (iter->string, value_str (v, src_width), src_width);
819 iter->dbl[0] = MIN (iter->dbl[0], v->f);
823 if (memcmp (iter->string, value_str (v, src_width), src_width) > 0)
824 memcpy (iter->string, value_str (v, src_width), src_width);
829 if (v->f > iter->arg[0].f)
830 iter->dbl[0] += weight;
831 iter->dbl[1] += weight;
835 if (memcmp (iter->arg[0].c,
836 value_str (v, src_width), src_width) < 0)
837 iter->dbl[0] += weight;
838 iter->dbl[1] += weight;
842 if (v->f < iter->arg[0].f)
843 iter->dbl[0] += weight;
844 iter->dbl[1] += weight;
848 if (memcmp (iter->arg[0].c,
849 value_str (v, src_width), src_width) > 0)
850 iter->dbl[0] += weight;
851 iter->dbl[1] += weight;
855 if (iter->arg[0].f <= v->f && v->f <= iter->arg[1].f)
856 iter->dbl[0] += weight;
857 iter->dbl[1] += weight;
861 if (memcmp (iter->arg[0].c,
862 value_str (v, src_width), src_width) <= 0
863 && memcmp (iter->arg[1].c,
864 value_str (v, src_width), src_width) >= 0)
865 iter->dbl[0] += weight;
866 iter->dbl[1] += weight;
870 if (iter->arg[0].f > v->f || v->f > iter->arg[1].f)
871 iter->dbl[0] += weight;
872 iter->dbl[1] += weight;
876 if (memcmp (iter->arg[0].c,
877 value_str (v, src_width), src_width) > 0
878 || memcmp (iter->arg[1].c,
879 value_str (v, src_width), src_width) < 0)
880 iter->dbl[0] += weight;
881 iter->dbl[1] += weight;
885 iter->dbl[0] += weight;
898 case FIRST | FSTRING:
901 memcpy (iter->string, value_str (v, src_width), src_width);
910 memcpy (iter->string, value_str (v, src_width), src_width);
914 case NMISS | FSTRING:
916 case NUMISS | FSTRING:
917 /* Our value is not missing or it would have been
918 caught earlier. Nothing to do. */
924 switch (iter->function)
927 iter->dbl[0] += weight;
938 /* Writes an aggregated record to OUTPUT. */
940 dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case)
942 struct ccase *c = case_create (dict_get_proto (agr->dict));
944 if ( agr->add_variables)
946 case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict));
953 for (i = 0; i < agr->break_var_cnt; i++)
955 const struct variable *v = agr->break_vars[i];
956 value_copy (case_data_rw_idx (c, value_idx),
957 case_data (break_case, v),
966 for (i = agr->agr_vars; i; i = i->next)
968 union value *v = case_data_rw (c, i->dest);
969 int width = var_get_width (i->dest);
971 if (agr->missing == COLUMNWISE && i->saw_missing
972 && (i->function & FUNC) != N && (i->function & FUNC) != NU
973 && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
975 value_set_missing (v, width);
976 casewriter_destroy (i->writer);
983 v->f = i->int1 ? i->dbl[0] : SYSMIS;
986 v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
992 struct percentile *median = percentile_create (0.5, i->cc);
993 struct order_stats *os = &median->parent;
994 struct casereader *sorted_reader = casewriter_make_reader (i->writer);
997 order_stats_accumulate (&os, 1,
1002 i->dbl[0] = percentile_calculate (median, PC_HAVERAGE);
1003 statistic_destroy (&median->parent.parent);
1012 /* FIXME: we should use two passes. */
1013 moments1_calculate (i->moments, NULL, NULL, &variance,
1015 if (variance != SYSMIS)
1016 v->f = sqrt (variance);
1023 v->f = i->int1 ? i->dbl[0] : SYSMIS;
1028 memcpy (value_str_rw (v, width), i->string, width);
1030 value_set_missing (v, width);
1039 case FOUT | FSTRING:
1040 v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS;
1049 case POUT | FSTRING:
1050 v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS;
1062 v->f = i->int1 ? i->dbl[0] : SYSMIS;
1064 case FIRST | FSTRING:
1065 case LAST | FSTRING:
1067 memcpy (value_str_rw (v, width), i->string, width);
1069 value_set_missing (v, width);
1072 case NMISS | FSTRING:
1076 case NUMISS | FSTRING:
1085 casewriter_write (output, c);
1088 /* Resets the state for all the aggregate functions. */
1090 initialize_aggregate_info (struct agr_proc *agr)
1092 struct agr_var *iter;
1094 for (iter = agr->agr_vars; iter; iter = iter->next)
1096 iter->saw_missing = false;
1097 iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
1098 iter->int1 = iter->int2 = 0;
1099 switch (iter->function)
1102 iter->dbl[0] = DBL_MAX;
1105 memset (iter->string, 255, var_get_width (iter->src));
1108 iter->dbl[0] = -DBL_MAX;
1111 memset (iter->string, 0, var_get_width (iter->src));
1115 struct caseproto *proto;
1116 struct subcase ordering;
1118 proto = caseproto_create ();
1119 proto = caseproto_add_width (proto, 0);
1120 proto = caseproto_add_width (proto, 0);
1122 if ( ! iter->subject)
1123 iter->subject = dict_create_internal_var (0, 0);
1125 if ( ! iter->weight)
1126 iter->weight = dict_create_internal_var (1, 0);
1128 subcase_init_var (&ordering, iter->subject, SC_ASCEND);
1129 iter->writer = sort_create_writer (&ordering, proto);
1130 subcase_destroy (&ordering);
1131 caseproto_unref (proto);
1137 if (iter->moments == NULL)
1138 iter->moments = moments1_create (MOMENT_VARIANCE);
1140 moments1_clear (iter->moments);