@display
AGGREGATE
- OUTFILE=@{*,'file-name',file_handle@}
+ OUTFILE=@{*,'file-name',file_handle@} [MODE=@{REPLACE, ADDVARIABLES@}]
/PRESORTED
/DOCUMENT
/MISSING=COLUMNWISE
system file, portable file, or scratch file by file name or file
handle (@pxref{File Handles}).
The aggregated cases are written to this file. If @samp{*} is
-specified, then the aggregated cases replace the active file. Use of
-OUTFILE to write a portable file or scratch file is a PSPP extension.
+specified, then the aggregated cases replace the active file.
+Use of OUTFILE to write a portable file or scratch file is a PSPP extension.
+
+If OUTFILE=@samp{*} is given, then the subcommand MODE may also be
+specified.
+The mode subcommand has two possible values: ADDVARIABLES or REPLACE.
+In REPLACE mode, the entire active file is replaced by a new file
+which contains just the break variables and the destination varibles.
+In this mode, the new file will contain as many cases as there are
+unique combinations of the break variables.
+In ADDVARIABLES mode, the destination variables will be appended to
+the existing active file.
+Cases which have identical combinations of values in their break
+variables, will receive identical values for the destination variables.
+The number of cases in the active file will remain unchanged.
+Note that if ADDVARIABLES is specified, then the data @emph{must} be
+sorted on the break variables.
By default, the active file will be sorted based on the break variables
before aggregation takes place. If the active file is already sorted
or otherwise grouped in terms of the break variables, specify
PRESORTED to save time.
+PRESORTED is assumed if MODE=ADDVARIABLES is used.
Specify DOCUMENT to copy the documents from the active file into the
aggregate file (@pxref{DOCUMENT}). Otherwise, the aggregate file will
struct subcase sort; /* Sort criteria (break variables). */
const struct variable **break_vars; /* Break variables. */
size_t break_var_cnt; /* Number of break variables. */
- struct ccase *break_case; /* Last values of break variables. */
enum missing_treatment missing; /* How to treat missing values. */
struct agr_var *agr_vars; /* First aggregate variable. */
struct dictionary *dict; /* Aggregate dictionary. */
const struct dictionary *src_dict; /* Dict of the source */
int case_cnt; /* Counts aggregated cases. */
+
+ bool add_variables; /* True iff the aggregated variables should
+ be appended to the existing dictionary */
};
-static void initialize_aggregate_info (struct agr_proc *,
- const struct ccase *);
+static void initialize_aggregate_info (struct agr_proc *);
static void accumulate_aggregate_info (struct agr_proc *,
const struct ccase *);
static bool parse_aggregate_functions (struct lexer *, const struct dictionary *,
struct agr_proc *);
static void agr_destroy (struct agr_proc *);
-static void dump_aggregate_info (struct agr_proc *agr,
- struct casewriter *output);
+static void dump_aggregate_info (const struct agr_proc *agr,
+ struct casewriter *output,
+ const struct ccase *break_case);
\f
/* Parsing. */
memset(&agr, 0 , sizeof (agr));
agr.missing = ITEMWISE;
- agr.break_case = NULL;
-
- agr.dict = dict_create ();
agr.src_dict = dict;
subcase_init_empty (&agr.sort);
- dict_set_label (agr.dict, dict_get_label (dict));
- dict_set_documents (agr.dict, dict_get_documents (dict));
/* OUTFILE subcommand must be first. */
lex_match (lexer, '/');
goto error;
}
+ if (out_file == NULL && lex_match_id (lexer, "MODE"))
+ {
+ lex_match (lexer, '=');
+ if (lex_match_id (lexer, "ADDVARIABLES"))
+ {
+ agr.add_variables = true;
+
+ /* presorted is assumed in ADDVARIABLES mode */
+ presorted = true;
+ }
+ else if (lex_match_id (lexer, "REPLACE"))
+ {
+ agr.add_variables = false;
+ }
+ else
+ goto error;
+ }
+
+ if ( agr.add_variables )
+ agr.dict = dict_clone (dict);
+ else
+ agr.dict = dict_create ();
+
+ dict_set_label (agr.dict, dict_get_label (dict));
+ dict_set_documents (agr.dict, dict_get_documents (dict));
+
/* Read most of the subcommands. */
for (;;)
{
goto error;
agr.break_var_cnt = subcase_get_n_fields (&agr.sort);
- for (i = 0; i < agr.break_var_cnt; i++)
- dict_clone_var_assert (agr.dict, agr.break_vars[i]);
+ if (! agr.add_variables)
+ for (i = 0; i < agr.break_var_cnt; i++)
+ dict_clone_var_assert (agr.dict, agr.break_vars[i]);
/* BREAK must follow the options. */
break;
casegrouper_get_next_group (grouper, &group);
casereader_destroy (group))
{
+ struct casereader *placeholder = NULL;
struct ccase *c = casereader_peek (group, 0);
+
if (c == NULL)
{
casereader_destroy (group);
continue;
}
- initialize_aggregate_info (&agr, c);
- case_unref (c);
- for (; (c = casereader_read (group)) != NULL; case_unref (c))
- accumulate_aggregate_info (&agr, c);
- dump_aggregate_info (&agr, output);
+ initialize_aggregate_info (&agr);
+
+ if ( agr.add_variables )
+ placeholder = casereader_clone (group);
+
+ {
+ struct ccase *cg;
+ for (; (cg = casereader_read (group)) != NULL; case_unref (cg))
+ accumulate_aggregate_info (&agr, cg);
+ }
+
+
+ if (agr.add_variables)
+ {
+ struct ccase *cg;
+ for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg))
+ dump_aggregate_info (&agr, output, cg);
+
+ casereader_destroy (placeholder);
+ }
+ else
+ {
+ dump_aggregate_info (&agr, output, c);
+ case_unref (c);
+ }
}
if (!casegrouper_destroy (grouper))
goto error;
subcase_destroy (&agr->sort);
free (agr->break_vars);
- case_unref (agr->break_case);
for (iter = agr->agr_vars; iter; iter = next)
{
next = iter->next;
/* Writes an aggregated record to OUTPUT. */
static void
-dump_aggregate_info (struct agr_proc *agr, struct casewriter *output)
+dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case)
{
struct ccase *c = case_create (dict_get_proto (agr->dict));
- {
- int value_idx = 0;
- int i;
+ if ( agr->add_variables)
+ {
+ case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict));
+ }
+ else
+ {
+ int value_idx = 0;
+ int i;
- for (i = 0; i < agr->break_var_cnt; i++)
- {
- const struct variable *v = agr->break_vars[i];
- value_copy (case_data_rw_idx (c, value_idx),
- case_data (agr->break_case, v),
- var_get_width (v));
- value_idx++;
- }
- }
+ for (i = 0; i < agr->break_var_cnt; i++)
+ {
+ const struct variable *v = agr->break_vars[i];
+ value_copy (case_data_rw_idx (c, value_idx),
+ case_data (break_case, v),
+ var_get_width (v));
+ value_idx++;
+ }
+ }
{
struct agr_var *i;
/* Resets the state for all the aggregate functions. */
static void
-initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input)
+initialize_aggregate_info (struct agr_proc *agr)
{
struct agr_var *iter;
- case_unref (agr->break_case);
- agr->break_case = case_ref (input);
-
for (iter = agr->agr_vars; iter; iter = iter->next)
{
iter->saw_missing = false;
])
AT_CHECK([pspp -O format=csv aggregate.sps], [1], [ignore], [])
AT_CLEANUP
+
+
+AT_SETUP([AGGREGATE mode=addvariables])
+AT_DATA([addvariables.sps],
+ [data list notable list /x * cn * y *.
+begin data.
+1 1 2
+3 2 3
+3 3 4
+5 4 6
+7 5 8
+7 6 9
+7 7 10
+9 8 11
+end data.
+
+aggregate outfile=* mode=addvariables
+ /break = x
+ /sum = sum(y)
+ /mean = mean (y).
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv addvariables.sps], [0],
+ [Table: Data List
+x,cn,y,sum,mean
+1.00,1.00,2.00,2.00,2.00
+3.00,2.00,3.00,7.00,3.50
+3.00,3.00,4.00,7.00,3.50
+5.00,4.00,6.00,6.00,6.00
+7.00,5.00,8.00,27.00,9.00
+7.00,6.00,9.00,27.00,9.00
+7.00,7.00,10.00,27.00,9.00
+9.00,8.00,11.00,11.00,11.00
+])
+
+AT_CLEANUP