X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Faggregate.c;h=0d918fb9088d01a588b5e944ef6d8d37c685a550;hb=73549ddf86afb11f488f9f95265f29aab2ef712e;hp=08d2f5e15c7d06176f1787b5e81017c7da54ad83;hpb=b5c82cc9aabe7e641011130240ae1b2e84348e23;p=pspp diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 08d2f5e15c..0d918fb908 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -146,17 +146,18 @@ struct agr_proc struct subcase sort; /* Sort criteria (break variables). */ const struct variable **break_vars; /* Break variables. */ size_t break_var_cnt; /* Number of break variables. */ - struct ccase *break_case; /* Last values of break variables. */ enum missing_treatment missing; /* How to treat missing values. */ struct agr_var *agr_vars; /* First aggregate variable. */ struct dictionary *dict; /* Aggregate dictionary. */ const struct dictionary *src_dict; /* Dict of the source */ int case_cnt; /* Counts aggregated cases. */ + + bool add_variables; /* True iff the aggregated variables should + be appended to the existing dictionary */ }; -static void initialize_aggregate_info (struct agr_proc *, - const struct ccase *); +static void initialize_aggregate_info (struct agr_proc *); static void accumulate_aggregate_info (struct agr_proc *, const struct ccase *); @@ -164,8 +165,9 @@ static void accumulate_aggregate_info (struct agr_proc *, static bool parse_aggregate_functions (struct lexer *, const struct dictionary *, struct agr_proc *); static void agr_destroy (struct agr_proc *); -static void dump_aggregate_info (struct agr_proc *agr, - struct casewriter *output); +static void dump_aggregate_info (const struct agr_proc *agr, + struct casewriter *output, + const struct ccase *break_case); /* Parsing. */ @@ -187,15 +189,11 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) memset(&agr, 0 , sizeof (agr)); agr.missing = ITEMWISE; - agr.break_case = NULL; - - agr.dict = dict_create (); agr.src_dict = dict; subcase_init_empty (&agr.sort); - dict_set_label (agr.dict, dict_get_label (dict)); - dict_set_documents (agr.dict, dict_get_documents (dict)); /* OUTFILE subcommand must be first. */ + lex_match (lexer, '/'); if (!lex_force_match_id (lexer, "OUTFILE")) goto error; lex_match (lexer, '='); @@ -206,6 +204,32 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) goto error; } + if (out_file == NULL && lex_match_id (lexer, "MODE")) + { + lex_match (lexer, '='); + if (lex_match_id (lexer, "ADDVARIABLES")) + { + agr.add_variables = true; + + /* presorted is assumed in ADDVARIABLES mode */ + presorted = true; + } + else if (lex_match_id (lexer, "REPLACE")) + { + agr.add_variables = false; + } + else + goto error; + } + + if ( agr.add_variables ) + agr.dict = dict_clone (dict); + else + agr.dict = dict_create (); + + dict_set_label (agr.dict, dict_get_label (dict)); + dict_set_documents (agr.dict, dict_get_documents (dict)); + /* Read most of the subcommands. */ for (;;) { @@ -235,9 +259,9 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) goto error; agr.break_var_cnt = subcase_get_n_fields (&agr.sort); - for (i = 0; i < agr.break_var_cnt; i++) - dict_clone_var_assert (agr.dict, agr.break_vars[i], - var_get_name (agr.break_vars[i])); + if (! agr.add_variables) + for (i = 0; i < agr.break_var_cnt; i++) + dict_clone_var_assert (agr.dict, agr.break_vars[i]); /* BREAK must follow the options. */ break; @@ -295,18 +319,40 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) casegrouper_get_next_group (grouper, &group); casereader_destroy (group)) { + struct casereader *placeholder = NULL; struct ccase *c = casereader_peek (group, 0); + if (c == NULL) { casereader_destroy (group); continue; } - initialize_aggregate_info (&agr, c); - case_unref (c); - for (; (c = casereader_read (group)) != NULL; case_unref (c)) - accumulate_aggregate_info (&agr, c); - dump_aggregate_info (&agr, output); + initialize_aggregate_info (&agr); + + if ( agr.add_variables ) + placeholder = casereader_clone (group); + + { + struct ccase *cg; + for (; (cg = casereader_read (group)) != NULL; case_unref (cg)) + accumulate_aggregate_info (&agr, cg); + } + + + if (agr.add_variables) + { + struct ccase *cg; + for (; (cg = casereader_read (placeholder)) != NULL; case_unref (cg)) + dump_aggregate_info (&agr, output, cg); + + casereader_destroy (placeholder); + } + else + { + dump_aggregate_info (&agr, output, c); + case_unref (c); + } } if (!casegrouper_destroy (grouper)) goto error; @@ -391,7 +437,8 @@ parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, size_t n_dest_prev = n_dest; if (!parse_DATA_LIST_vars (lexer, &dest, &n_dest, - PV_APPEND | PV_SINGLE | PV_NO_SCRATCH)) + (PV_APPEND | PV_SINGLE | PV_NO_SCRATCH + | PV_NO_DUPLICATE))) goto error; /* Assign empty labels. */ @@ -581,7 +628,7 @@ parse_aggregate_functions (struct lexer *lexer, const struct dictionary *dict, } if (function->alpha_type == VAL_STRING) - destvar = dict_clone_var (agr->dict, v->src, dest[i]); + destvar = dict_clone_var_as (agr->dict, v->src, dest[i]); else { assert (var_is_numeric (v->src) @@ -693,7 +740,6 @@ agr_destroy (struct agr_proc *agr) subcase_destroy (&agr->sort); free (agr->break_vars); - case_unref (agr->break_case); for (iter = agr->agr_vars; iter; iter = next) { next = iter->next; @@ -711,8 +757,8 @@ agr_destroy (struct agr_proc *agr) else if (iter->function == SD) moments1_destroy (iter->moments); - var_destroy (iter->subject); - var_destroy (iter->weight); + dict_destroy_internal_var (iter->subject); + dict_destroy_internal_var (iter->weight); free (iter); } @@ -919,23 +965,28 @@ accumulate_aggregate_info (struct agr_proc *agr, const struct ccase *input) /* Writes an aggregated record to OUTPUT. */ static void -dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) +dump_aggregate_info (const struct agr_proc *agr, struct casewriter *output, const struct ccase *break_case) { struct ccase *c = case_create (dict_get_proto (agr->dict)); - { - int value_idx = 0; - int i; + if ( agr->add_variables) + { + case_copy (c, 0, break_case, 0, dict_get_var_cnt (agr->src_dict)); + } + else + { + int value_idx = 0; + int i; - for (i = 0; i < agr->break_var_cnt; i++) - { - const struct variable *v = agr->break_vars[i]; - value_copy (case_data_rw_idx (c, value_idx), - case_data (agr->break_case, v), - var_get_width (v)); - value_idx++; - } - } + for (i = 0; i < agr->break_var_cnt; i++) + { + const struct variable *v = agr->break_vars[i]; + value_copy (case_data_rw_idx (c, value_idx), + case_data (break_case, v), + var_get_width (v)); + value_idx++; + } + } { struct agr_var *i; @@ -965,20 +1016,20 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) case MEDIAN: { struct casereader *sorted_reader; - struct order_stats *median = percentile_create (0.5, i->cc); + struct percentile *median = percentile_create (0.5, i->cc); + struct order_stats *os = &median->parent; sorted_reader = casewriter_make_reader (i->writer); - order_stats_accumulate (&median, 1, + order_stats_accumulate (&os, 1, sorted_reader, i->weight, i->subject, i->exclude); - v->f = percentile_calculate ((struct percentile *) median, - PC_HAVERAGE); + v->f = percentile_calculate (median, PC_HAVERAGE); - statistic_destroy ((struct statistic *) median); + statistic_destroy (&median->parent.parent); } break; case SD: @@ -1069,13 +1120,10 @@ dump_aggregate_info (struct agr_proc *agr, struct casewriter *output) /* Resets the state for all the aggregate functions. */ static void -initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) +initialize_aggregate_info (struct agr_proc *agr) { struct agr_var *iter; - case_unref (agr->break_case); - agr->break_case = case_ref (input); - for (iter = agr->agr_vars; iter; iter = iter->next) { iter->saw_missing = false; @@ -1105,10 +1153,10 @@ initialize_aggregate_info (struct agr_proc *agr, const struct ccase *input) proto = caseproto_add_width (proto, 0); if ( ! iter->subject) - iter->subject = var_create_internal (0, 0); + iter->subject = dict_create_internal_var (0, 0); if ( ! iter->weight) - iter->weight = var_create_internal (1, 0); + iter->weight = dict_create_internal_var (1, 0); subcase_init_var (&ordering, iter->subject, SC_ASCEND); iter->writer = sort_create_writer (&ordering, proto);