1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
27 #include "file-handle.h"
39 #include "debug-print.h"
41 /* Specifies how to make an aggregate variable. */
44 struct agr_var *next; /* Next in list. */
46 /* Collected during parsing. */
47 struct variable *src; /* Source variable. */
48 struct variable *dest; /* Target variable. */
49 int function; /* Function. */
50 int include_missing; /* 1=Include user-missing values. */
51 union value arg[2]; /* Arguments. */
53 /* Accumulated during AGGREGATE execution. */
60 /* Aggregation functions. */
63 NONE, SUM, MEAN, SD, MAX, MIN, PGT, PLT, PIN, POUT, FGT, FLT, FIN,
64 FOUT, N, NU, NMISS, NUMISS, FIRST, LAST,
65 N_AGR_FUNCS, N_NO_VARS, NU_NO_VARS,
66 FUNC = 0x1f, /* Function mask. */
67 FSTRING = 1<<5, /* String function bit. */
68 FWEIGHT = 1<<6, /* Weighted function bit. */
69 FOPTIONS = FSTRING | FWEIGHT /* Function options mask. */
72 /* Attributes of an aggregation function. */
75 const char *name; /* Aggregation function name. */
76 int n_args; /* Number of arguments. */
77 int alpha_type; /* When given ALPHA arguments, output type. */
78 struct fmt_spec format; /* Format spec if alpha_type != ALPHA. */
81 /* Attributes of aggregation functions. */
82 static struct agr_func agr_func_tab[] =
84 {"<NONE>", 0, -1, {0, 0, 0}},
85 {"SUM", 0, -1, {FMT_F, 8, 2}},
86 {"MEAN", 0, -1, {FMT_F, 8, 2}},
87 {"SD", 0, -1, {FMT_F, 8, 2}},
88 {"MAX", 0, ALPHA, {-1, -1, -1}},
89 {"MIN", 0, ALPHA, {-1, -1, -1}},
90 {"PGT", 1, NUMERIC, {FMT_F, 5, 1}},
91 {"PLT", 1, NUMERIC, {FMT_F, 5, 1}},
92 {"PIN", 2, NUMERIC, {FMT_F, 5, 1}},
93 {"POUT", 2, NUMERIC, {FMT_F, 5, 1}},
94 {"FGT", 1, NUMERIC, {FMT_F, 5, 3}},
95 {"FLT", 1, NUMERIC, {FMT_F, 5, 3}},
96 {"FIN", 2, NUMERIC, {FMT_F, 5, 3}},
97 {"FOUT", 2, NUMERIC, {FMT_F, 5, 3}},
98 {"N", 0, NUMERIC, {FMT_F, 7, 0}},
99 {"NU", 0, NUMERIC, {FMT_F, 7, 0}},
100 {"NMISS", 0, NUMERIC, {FMT_F, 7, 0}},
101 {"NUMISS", 0, NUMERIC, {FMT_F, 7, 0}},
102 {"FIRST", 0, ALPHA, {-1, -1, -1}},
103 {"LAST", 0, ALPHA, {-1, -1, -1}},
104 {NULL, 0, -1, {-1, -1, -1}},
105 {"N", 0, NUMERIC, {FMT_F, 7, 0}},
106 {"NU", 0, NUMERIC, {FMT_F, 7, 0}},
109 /* Output file, or NULL for the active file. */
110 static struct file_handle *outfile;
112 /* Missing value types. */
115 ITEMWISE, /* Missing values item by item. */
116 COLUMNWISE /* Missing values column by column. */
119 /* ITEMWISE or COLUMNWISE. */
122 /* Aggregate variables. */
123 static struct agr_var *agr_first, *agr_next;
125 /* Aggregate dictionary. */
126 static struct dictionary *agr_dict;
128 /* Number of cases passed through aggregation. */
129 static int case_count;
131 /* Last values of the break variables. */
132 static union value *prev_case;
134 /* Buffers for use by the 10x transformation. */
135 static flt64 *buf64_1xx;
136 static struct ccase *buf_1xx;
138 static void initialize_aggregate_info (void);
141 static int parse_aggregate_functions (void);
142 static void free_aggregate_functions (void);
143 static int aggregate_single_case (struct ccase *input, struct ccase *output);
144 static int create_sysfile (void);
146 static int agr_00x_trns_proc (struct trns_header *, struct ccase *);
147 static void agr_00x_end_func (void);
148 static int agr_10x_trns_proc (struct trns_header *, struct ccase *);
149 static void agr_10x_trns_free (struct trns_header *);
150 static void agr_10x_end_func (void);
151 static int agr_11x_func (void);
154 static void debug_print (int flags);
159 /* Parses and executes the AGGREGATE procedure. */
164 int parse_sort_variables (void);
166 /* Have we seen these subcommands? */
174 agr_dict = new_dictionary (1);
176 lex_match_id ("AGGREGATE");
178 /* Read most of the subcommands. */
183 if (lex_match_id ("OUTFILE"))
188 free_dictionary (agr_dict);
189 msg (SE, _("OUTFILE specified multiple times."));
199 outfile = fh_parse_file_handle ();
203 free_dictionary (agr_dict);
208 else if (lex_match_id ("MISSING"))
211 if (!lex_match_id ("COLUMNWISE"))
214 free_dictionary (agr_dict);
215 lex_error (_("while expecting COLUMNWISE"));
218 missing = COLUMNWISE;
220 else if (lex_match_id ("DOCUMENT"))
222 else if (lex_match_id ("PRESORTED"))
224 else if (lex_match_id ("BREAK"))
229 free_dictionary (agr_dict);
230 msg (SE, _("BREAK specified multiple times."));
236 if (!parse_sort_variables ())
238 free_dictionary (agr_dict);
245 for (i = 0; i < nv_sort; i++)
249 v = dup_variable (agr_dict, v_sort[i], v_sort[i]->name);
257 /* Check for proper syntax. */
259 msg (SW, _("BREAK subcommand not specified."));
261 /* Read in the aggregate functions. */
262 if (!parse_aggregate_functions ())
264 free_aggregate_functions ();
269 /* Delete documents. */
272 free (agr_dict->documents);
273 agr_dict->documents = NULL;
274 agr_dict->n_documents = 0;
277 /* Cancel SPLIT FILE. */
278 default_dict.n_splits = 0;
279 free (default_dict.splits);
280 default_dict.splits = NULL;
288 initialize_aggregate_info ();
290 /* How to implement all this... There are three important variables:
291 whether output is going to the active file (0) or a separate file
292 (1); whether the input data is presorted (0) or needs sorting
293 (1); whether there is a temporary transformation (1) or not (0).
294 The eight cases are as follows:
296 000 (0): Pass it through an aggregate transformation that
299 001 (1): Cancel the temporary transformation and handle as 000.
301 010 (2): Set up a SORT CASES and aggregate the output, writing
302 the results to the active file.
304 011 (3): Cancel the temporary transformation and handle as 010.
306 100 (4): Pass it through an aggregate transformation that doesn't
307 modify the data but merely writes it to the output file.
309 101 (5): Handled as 100.
311 110 (6): Set up a SORT CASES and capture the output, aggregate
312 it, write it to the output file without modifying the active
315 111 (7): Handled as 110. */
322 if (nv_sort != 0 && (seen & 4) == 0)
342 struct trns_header *t = xmalloc (sizeof *t);
343 t->proc = agr_00x_trns_proc;
345 add_transformation (t);
348 temp_dict = agr_dict;
353 procedure (NULL, NULL, agr_00x_end_func);
360 if (!create_sysfile ())
364 struct trns_header *t = xmalloc (sizeof *t);
365 t->proc = agr_10x_trns_proc;
366 t->free = agr_10x_trns_free;
367 add_transformation (t);
369 procedure (NULL, NULL, agr_10x_end_func);
379 if (!create_sysfile ())
381 read_sort_output (agr_11x_func);
384 struct ccase *save_temp_case = temp_case;
387 temp_case = save_temp_case;
402 free_aggregate_functions ();
410 free_aggregate_functions ();
416 /* Create a system file for use in aggregation to an external file,
417 and allocate temporary buffers for writing out cases. */
419 create_sysfile (void)
421 struct sfm_write_info w;
424 w.compress = set_scompression;
425 if (!sfm_write_dictionary (&w))
427 free_aggregate_functions ();
429 free_dictionary (agr_dict);
433 buf64_1xx = xmalloc (sizeof *buf64_1xx * w.case_size);
434 buf_1xx = xmalloc (sizeof (struct ccase) + sizeof (union value) * (agr_dict->nval - 1));
439 /* Parse all the aggregate functions. */
441 parse_aggregate_functions (void)
443 agr_first = agr_next = NULL;
445 /* Anticipate weighting for optimization later. */
446 update_weighting (&default_dict);
448 /* Parse everything. */
456 struct agr_func *function;
461 struct variable **src;
474 /* Parse the list of target variables. */
475 while (!lex_match ('='))
477 int n_dest_prev = n_dest;
479 if (!parse_DATA_LIST_vars (&dest, &n_dest, PV_APPEND | PV_SINGLE | PV_NO_SCRATCH))
482 /* Assign empty labels. */
486 dest_label = xrealloc (dest_label, sizeof *dest_label * n_dest);
487 for (j = n_dest_prev; j < n_dest; j++)
488 dest_label[j] = NULL;
491 if (token == T_STRING)
493 ds_truncate (&tokstr, 120);
494 dest_label[n_dest - 1] = xstrdup (ds_value (&tokstr));
499 /* Get the name of the aggregation function. */
502 lex_error (_("expecting aggregation function"));
507 if (tokid[strlen (tokid) - 1] == '.')
510 tokid[strlen (tokid) - 1] = 0;
513 for (function = agr_func_tab; function->name; function++)
514 if (!strcmp (function->name, tokid))
516 if (NULL == function->name)
518 msg (SE, _("Unknown aggregation function %s."), tokid);
521 func_index = function - agr_func_tab;
524 /* Check for leading lparen. */
525 if (!lex_match ('('))
528 func_index = N_NO_VARS;
529 else if (func_index == NU)
530 func_index = NU_NO_VARS;
533 lex_error (_("expecting `('"));
537 /* Parse list of source variables. */
539 int pv_opts = PV_NO_SCRATCH;
541 if (func_index == SUM || func_index == MEAN || func_index == SD)
542 pv_opts |= PV_NUMERIC;
543 else if (function->n_args)
544 pv_opts |= PV_SAME_TYPE;
546 if (!parse_variables (&default_dict, &src, &n_src, pv_opts))
550 /* Parse function arguments, for those functions that
551 require arguments. */
552 if (function->n_args != 0)
553 for (i = 0; i < function->n_args; i++)
558 if (token == T_STRING)
560 arg[i].c = xstrdup (ds_value (&tokstr));
563 else if (token == T_NUM)
568 msg (SE, _("Missing argument %d to %s."), i + 1, function->name);
574 if (type != src[0]->type)
576 msg (SE, _("Arguments to %s must be of same type as "
577 "source variables."),
583 /* Trailing rparen. */
586 lex_error (_("expecting `)'"));
590 /* Now check that the number of source variables match the
591 number of target variables. Do this here because if we
592 do it earlier then the user can get very misleading error
593 messages; i.e., `AGGREGATE x=SUM(y t).' will get this
594 error message when a proper message would be more like
595 `unknown variable t'. */
598 msg (SE, _("Number of source variables (%d) does not match "
599 "number of target variables (%d)."),
605 /* Finally add these to the linked list of aggregation
607 for (i = 0; i < n_dest; i++)
609 struct agr_var *v = xmalloc (sizeof *v);
611 /* Add variable to chain. */
613 agr_next = agr_next->next = v;
615 agr_first = agr_next = v;
616 agr_next->next = NULL;
618 /* Create the target variable in the aggregate
621 struct variable *destvar;
623 agr_next->function = func_index;
629 agr_next->src = src[i];
631 if (src[i]->type == ALPHA)
633 agr_next->function |= FSTRING;
634 agr_next->string = xmalloc (src[i]->width);
637 if (default_dict.weight_index != -1)
638 agr_next->function |= FWEIGHT;
640 if (agr_next->src->type == NUMERIC)
641 output_type = NUMERIC;
643 output_type = function->alpha_type;
645 if (function->alpha_type == ALPHA)
646 destvar = dup_variable (agr_dict, agr_next->src, dest[i]);
649 destvar = create_variable (agr_dict, dest[i], output_type,
650 agr_next->src->width);
651 if (output_type == NUMERIC)
652 destvar->print = destvar->write = function->format;
653 if (output_type == NUMERIC && default_dict.weight_index != -1
654 && (func_index == N || func_index == N_NO_VARS
655 || func_index == NU || func_index == NU_NO_VARS))
657 struct fmt_spec f = {FMT_F, 8, 2};
659 destvar->print = destvar->write = f;
663 agr_next->src = NULL;
664 destvar = create_variable (agr_dict, dest[i], NUMERIC, 0);
669 msg (SE, _("Variable name %s is not unique within the "
670 "aggregate file dictionary, which contains "
671 "the aggregate variables and the break "
681 destvar->label = dest_label[i];
682 dest_label[i] = NULL;
684 else if (function->alpha_type == ALPHA)
685 destvar->print = destvar->write = function->format;
687 agr_next->dest = destvar;
690 agr_next->include_missing = include_missing;
692 if (agr_next->src != NULL)
696 if (agr_next->src->type == NUMERIC)
697 for (j = 0; j < function->n_args; j++)
698 agr_next->arg[j].f = arg[j].f;
700 for (j = 0; j < function->n_args; j++)
701 agr_next->arg[j].c = xstrdup (arg[j].c);
705 if (src != NULL && src[0]->type == ALPHA)
706 for (i = 0; i < function->n_args; i++)
716 if (!lex_match ('/'))
721 lex_error ("expecting end of command");
727 for (i = 0; i < n_dest; i++)
730 free (dest_label[i]);
736 if (src && n_src && src[0]->type == ALPHA)
737 for (i = 0; i < function->n_args; i++)
748 /* Frees all the state for the AGGREGATE procedure. */
750 free_aggregate_functions (void)
752 struct agr_var *iter, *next;
755 free_dictionary (agr_dict);
756 for (iter = agr_first; iter; iter = next)
760 if (iter->function & FSTRING)
765 n_args = agr_func_tab[iter->function & FUNC].n_args;
766 for (i = 0; i < n_args; i++)
767 free (iter->arg[i].c);
776 static void accumulate_aggregate_info (struct ccase *input);
777 static void dump_aggregate_info (struct ccase *output);
779 /* Processes a single case INPUT for aggregation. If output is
780 warranted, it is written to case OUTPUT, which may be (but need not
781 be) an alias to INPUT. Returns -1 when output is performed, -2
783 /* The code in this function has an eerie similarity to
784 vfm.c:SPLIT_FILE_procfunc()... */
786 aggregate_single_case (struct ccase *input, struct ccase *output)
788 /* The first case always begins a new break group. We also need to
789 preserve the values of the case for later comparison. */
790 if (case_count++ == 0)
797 for (i = 0; i < nv_sort; i++)
798 n_elem += v_sort[i]->nv;
801 prev_case = xmalloc (sizeof *prev_case * n_elem);
803 /* Copy INPUT into prev_case. */
805 union value *iter = prev_case;
808 for (i = 0; i < nv_sort; i++)
810 struct variable *v = v_sort[i];
812 if (v->type == NUMERIC)
813 (iter++)->f = input->data[v->fv].f;
816 memcpy (iter->s, input->data[v->fv].s, v->width);
822 accumulate_aggregate_info (input);
827 /* Compare the value of each break variable to the values on the
830 union value *iter = prev_case;
833 for (i = 0; i < nv_sort; i++)
835 struct variable *v = v_sort[i];
840 if (approx_ne (input->data[v->fv].f, iter->f))
845 if (memcmp (input->data[v->fv].s, iter->s, v->width))
855 accumulate_aggregate_info (input);
860 /* The values of the break variable are different from the values on
861 the previous case. That means that it's time to dump aggregate
863 dump_aggregate_info (output);
864 initialize_aggregate_info ();
865 accumulate_aggregate_info (input);
867 /* Copy INPUT into prev_case. */
869 union value *iter = prev_case;
872 for (i = 0; i < nv_sort; i++)
874 struct variable *v = v_sort[i];
876 if (v->type == NUMERIC)
877 (iter++)->f = input->data[v->fv].f;
880 memcpy (iter->s, input->data[v->fv].s, v->width);
889 /* Accumulates aggregation data from the case INPUT. */
891 accumulate_aggregate_info (struct ccase *input)
893 struct agr_var *iter;
895 #define WEIGHT (input->data[default_dict.weight_index].f)
897 for (iter = agr_first; iter; iter = iter->next)
900 union value *v = &input->data[iter->src->fv];
902 if ((!iter->include_missing && is_missing (v, iter->src))
903 || (iter->include_missing && iter->src->type == NUMERIC
906 switch (iter->function)
908 case NMISS | FWEIGHT:
909 iter->dbl[0] += WEIGHT;
913 case NUMISS | FWEIGHT:
921 /* This is horrible. There are too many possibilities. */
922 switch (iter->function)
926 iter->dbl[0] += v->f;
929 iter->dbl[0] += v->f;
935 iter->dbl[0] += v->f * w;
940 iter->dbl[0] += v->f;
941 iter->dbl[1] += v->f * v->f;
947 double product = v->f * w;
948 iter->dbl[0] += product;
949 iter->dbl[1] += product * v->f;
955 iter->dbl[0] = max (iter->dbl[0], v->f);
959 case MAX | FSTRING | FWEIGHT:
960 if (memcmp (iter->string, v->s, iter->src->width) < 0)
961 memcpy (iter->string, v->s, iter->src->width);
966 iter->dbl[0] = min (iter->dbl[0], v->f);
970 case MIN | FSTRING | FWEIGHT:
971 if (memcmp (iter->string, v->s, iter->src->width) > 0)
972 memcpy (iter->string, v->s, iter->src->width);
977 if (approx_gt (v->f, iter->arg[0].f))
985 if (approx_gt (v->f, iter->arg[0].f))
992 if (memcmp (iter->arg[0].c, v->s, iter->src->width) < 0)
996 case FGT | FSTRING | FWEIGHT:
997 case PGT | FSTRING | FWEIGHT:
1000 if (memcmp (iter->arg[0].c, v->s, iter->src->width) < 0)
1007 if (approx_lt (v->f, iter->arg[0].f))
1015 if (approx_lt (v->f, iter->arg[0].f))
1022 if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0)
1026 case FLT | FSTRING | FWEIGHT:
1027 case PLT | FSTRING | FWEIGHT:
1030 if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0)
1037 if (approx_in_range (v->f, iter->arg[0].f, iter->arg[1].f))
1045 if (approx_in_range (v->f, iter->arg[0].f, iter->arg[1].f))
1052 if (memcmp (iter->arg[0].c, v->s, iter->src->width) <= 0
1053 && memcmp (iter->arg[1].c, v->s, iter->src->width) >= 0)
1057 case FIN | FSTRING | FWEIGHT:
1058 case PIN | FSTRING | FWEIGHT:
1061 if (memcmp (iter->arg[0].c, v->s, iter->src->width) <= 0
1062 && memcmp (iter->arg[1].c, v->s, iter->src->width) >= 0)
1069 if (!approx_in_range (v->f, iter->arg[0].f, iter->arg[1].f))
1073 case FOUT | FWEIGHT:
1074 case POUT | FWEIGHT:
1077 if (!approx_in_range (v->f, iter->arg[0].f, iter->arg[1].f))
1082 case FOUT | FSTRING:
1083 case POUT | FSTRING:
1084 if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0
1085 && memcmp (iter->arg[1].c, v->s, iter->src->width) < 0)
1089 case FOUT | FSTRING | FWEIGHT:
1090 case POUT | FSTRING | FWEIGHT:
1093 if (memcmp (iter->arg[0].c, v->s, iter->src->width) > 0
1094 && memcmp (iter->arg[1].c, v->s, iter->src->width) < 0)
1100 iter->dbl[0] += WEIGHT;
1108 case FIRST | FWEIGHT:
1109 if (iter->int1 == 0)
1111 iter->dbl[0] = v->f;
1115 case FIRST | FSTRING:
1116 case FIRST | FSTRING | FWEIGHT:
1117 if (iter->int1 == 0)
1119 memcpy (iter->string, v->s, iter->src->width);
1124 case LAST | FWEIGHT:
1125 iter->dbl[0] = v->f;
1128 case LAST | FSTRING:
1129 case LAST | FSTRING | FWEIGHT:
1130 memcpy (iter->string, v->s, iter->src->width);
1137 switch (iter->function)
1139 case N_NO_VARS | FWEIGHT:
1140 iter->dbl[0] += WEIGHT;
1144 case NU_NO_VARS | FWEIGHT:
1153 /* We've come to a record that differs from the previous in one or
1154 more of the break variables. Make an output record from the
1155 accumulated statistics in the OUTPUT case. */
1157 dump_aggregate_info (struct ccase *output)
1159 debug_printf (("(dumping "));
1167 for (i = 0; i < nv_sort; i++)
1168 n_elem += v_sort[i]->nv;
1170 debug_printf (("n_elem=%d:", n_elem));
1171 memcpy (output->data, prev_case, sizeof (union value) * n_elem);
1177 for (i = agr_first; i; i = i->next)
1179 union value *v = &output->data[i->dest->fv];
1181 debug_printf ((" %d,%d", i->dest->fv, i->dest->nv));
1183 if (missing == COLUMNWISE && i->missing != 0
1184 && (i->function & FUNC) != N && (i->function & FUNC) != NU
1185 && (i->function & FUNC) != NMISS && (i->function & FUNC) != NUMISS)
1187 if (i->function & FSTRING)
1188 memset (v->s, ' ', i->dest->width);
1194 switch (i->function)
1201 v->f = i->int1 ? i->dbl[0] / i->int1 : SYSMIS;
1203 case MEAN | FWEIGHT:
1204 v->f = i->dbl[1] != 0.0 ? i->dbl[0] / i->dbl[1] : SYSMIS;
1207 v->f = ((i->int1 > 1)
1208 ? calc_stddev (calc_variance (i->dbl, i->int1))
1212 v->f = ((i->dbl[2] > 1.0)
1213 ? calc_stddev (calc_variance (i->dbl, i->dbl[2]))
1220 v->f = i->int1 ? i->dbl[0] : SYSMIS;
1223 case MAX | FSTRING | FWEIGHT:
1225 case MIN | FSTRING | FWEIGHT:
1227 memcpy (v->s, i->string, i->dest->width);
1229 memset (v->s, ' ', i->dest->width);
1238 case FOUT | FSTRING:
1239 v->f = i->int2 ? (double) i->int1 / (double) i->int2 : SYSMIS;
1242 case FGT | FSTRING | FWEIGHT:
1244 case FLT | FSTRING | FWEIGHT:
1246 case FIN | FSTRING | FWEIGHT:
1247 case FOUT | FWEIGHT:
1248 case FOUT | FSTRING | FWEIGHT:
1249 v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] : SYSMIS;
1258 case POUT | FSTRING:
1260 ? (double) i->int1 / (double) i->int2 * 100.0
1264 case PGT | FSTRING | FWEIGHT:
1266 case PLT | FSTRING | FWEIGHT:
1268 case PIN | FSTRING | FWEIGHT:
1269 case POUT | FWEIGHT:
1270 case POUT | FSTRING | FWEIGHT:
1271 v->f = i->dbl[1] ? i->dbl[0] / i->dbl[1] * 100.0 : SYSMIS;
1281 case FIRST | FWEIGHT:
1283 case LAST | FWEIGHT:
1284 v->f = i->int1 ? i->dbl[0] : SYSMIS;
1286 case FIRST | FSTRING:
1287 case FIRST | FSTRING | FWEIGHT:
1288 case LAST | FSTRING:
1289 case LAST | FSTRING | FWEIGHT:
1291 memcpy (v->s, i->string, i->dest->width);
1293 memset (v->s, ' ', i->dest->width);
1295 case N_NO_VARS | FWEIGHT:
1300 case NU_NO_VARS | FWEIGHT:
1303 case NMISS | FWEIGHT:
1308 case NUMISS | FWEIGHT:
1316 debug_printf ((") "));
1319 /* Resets the state for all the aggregate functions. */
1321 initialize_aggregate_info (void)
1323 struct agr_var *iter;
1325 for (iter = agr_first; iter; iter = iter->next)
1327 int plain_function = iter->function & ~FWEIGHT;
1330 switch (plain_function)
1333 iter->dbl[0] = DBL_MAX;
1336 memset (iter->string, 255, iter->src->width);
1339 iter->dbl[0] = -DBL_MAX;
1342 memset (iter->string, 0, iter->src->width);
1345 iter->dbl[0] = iter->dbl[1] = iter->dbl[2] = 0.0;
1346 iter->int1 = iter->int2 = 0;
1352 /* Aggregate each case as it comes through. Cases which aren't needed
1355 agr_00x_trns_proc (struct trns_header *h unused, struct ccase *c)
1357 int code = aggregate_single_case (c, compaction_case);
1358 debug_printf (("%d ", code));
1362 /* Output the last aggregate case. It's okay to call the vfm_sink's
1363 write() method here because end_func is called so soon after all
1364 the cases have been output; very little has been cleaned up at this
1367 agr_00x_end_func (void)
1369 /* Ensure that info for the last break group gets written to the
1371 dump_aggregate_info (compaction_case);
1372 vfm_sink_info.ncases++;
1376 /* Transform the aggregate case buf_1xx, in internal format, to system
1377 file format, in buf64_1xx, and write the resultant case to the
1380 write_case_to_sfm (void)
1382 flt64 *p = buf64_1xx;
1385 for (i = 0; i < agr_dict->nvar; i++)
1387 struct variable *v = agr_dict->var[i];
1389 if (v->type == NUMERIC)
1391 double src = buf_1xx->data[v->fv].f;
1399 memcpy (p, buf_1xx->data[v->fv].s, v->width);
1400 memset (&((char *) p)[v->width], ' ',
1401 REM_RND_UP (v->width, sizeof (flt64)));
1402 p += DIV_RND_UP (v->width, sizeof (flt64));
1406 sfm_write_case (outfile, buf64_1xx, p - buf64_1xx);
1409 /* Aggregate the current case and output it if we passed a
1412 agr_10x_trns_proc (struct trns_header *h unused, struct ccase *c)
1414 int code = aggregate_single_case (c, buf_1xx);
1416 assert (code == -2 || code == -1);
1418 write_case_to_sfm ();
1422 /* Close the system file now that we're done with it. */
1424 agr_10x_trns_free (struct trns_header *h unused)
1426 fh_close_handle (outfile);
1429 /* Ensure that info for the last break group gets written to the
1432 agr_10x_end_func (void)
1434 dump_aggregate_info (buf_1xx);
1435 write_case_to_sfm ();
1438 /* When called with temp_case non-NULL (the normal case), runs the
1439 case through the aggregater and outputs it to the system file if
1440 appropriate. If temp_case is NULL, finishes up writing the last
1441 case if necessary. */
1445 if (temp_case != NULL)
1447 int code = aggregate_single_case (temp_case, buf_1xx);
1449 assert (code == -2 || code == -1);
1451 write_case_to_sfm ();
1457 dump_aggregate_info (buf_1xx);
1458 write_case_to_sfm ();
1460 fh_close_handle (outfile);
1467 /* Print out useful debugging information. */
1469 debug_print (int flags)
1471 printf ("AGGREGATE\n /OUTFILE=%s\n",
1472 outfile ? fh_handle_filename (outfile) : "*");
1474 if (missing == COLUMNWISE)
1475 puts (" /MISSING=COLUMNWISE");
1478 puts (" /DOCUMENT");
1480 puts (" /PRESORTED");
1485 printf (" /BREAK=");
1486 for (i = 0; i < nv_sort; i++)
1487 printf ("%s(%c) ", v_sort[i]->name,
1488 v_sort[i]->p.srt.order == SRT_ASCEND ? 'A' : 'D');
1489 putc ('\n', stdout);
1493 struct agr_var *iter;
1495 for (iter = agr_first; iter; iter = iter->next)
1497 struct agr_func *f = &agr_func_tab[iter->function & FUNC];
1499 printf (" /%s", iter->dest->name);
1500 if (iter->dest->label)
1501 printf ("'%s'", iter->dest->label);
1502 printf ("=%s(%s", f->name, iter->src->name);
1507 for (i = 0; i < f->n_args; i++)
1510 if (iter->src->type == NUMERIC)
1511 printf ("%g", iter->arg[i].f);
1513 printf ("%.*s", iter->src->width, iter->arg[i].c);
1521 #endif /* DEBUGGING */