X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Frank.q;h=d3ee3707a4437112e84059270c2d16af7f11370c;hb=c9d55afdda746d7e8b39e5a117c1644debbb0c75;hp=827e579ca733a1051f673a892c91028c5ac10985;hpb=43b1296aafe7582e7dbe6c2b6a8b478d7d9b0fcf;p=pspp diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q index 827e579ca7..d3ee3707a4 100644 --- a/src/language/stats/rank.q +++ b/src/language/stats/rank.q @@ -19,23 +19,24 @@ #include #include +#include +#include +#include +#include #include #include #include #include +#include +#include #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include #include +#include #include @@ -151,7 +152,7 @@ static enum mv_class exclude_values; static struct rank_spec *rank_specs; static size_t n_rank_specs; -static struct case_ordering *sc; +static struct subcase sc; static const struct variable **group_vars; static size_t n_group_vars; @@ -232,53 +233,59 @@ create_var_label (struct variable *dest_var, static bool -rank_cmd (struct dataset *ds, const struct case_ordering *sc, +rank_cmd (struct dataset *ds, const struct subcase *sc, const struct rank_spec *rank_specs, int n_rank_specs) { - struct case_ordering *base_ordering; + struct dictionary *d = dataset_dict (ds); bool ok = true; int i; - const int n_splits = dict_get_split_cnt (dataset_dict (ds)); - base_ordering = case_ordering_create (dataset_dict (ds)); - for (i = 0; i < n_splits ; i++) - case_ordering_add_var (base_ordering, - dict_get_split_vars (dataset_dict (ds))[i], - SRT_ASCEND); - - for (i = 0; i < n_group_vars; i++) - case_ordering_add_var (base_ordering, group_vars[i], SRT_ASCEND); - for (i = 0 ; i < case_ordering_get_var_cnt (sc) ; ++i ) + for (i = 0 ; i < subcase_get_n_fields (sc) ; ++i ) { - struct case_ordering *ordering; - struct casegrouper *grouper; - struct casereader *group; + /* Rank variable at index I in SC. */ + struct casegrouper *split_grouper; + struct casereader *split_group; struct casewriter *output; - struct casereader *ranked_file; - - ordering = case_ordering_clone (base_ordering); - case_ordering_add_var (ordering, - case_ordering_get_var (sc, i), - case_ordering_get_direction (sc, i)); proc_discard_output (ds); - grouper = casegrouper_create_case_ordering (sort_execute (proc_open (ds), - ordering), - base_ordering); - output = autopaging_writer_create (dict_get_next_value_idx ( - dataset_dict (ds))); - while (casegrouper_get_next_group (grouper, &group)) - rank_sorted_file (group, output, dataset_dict (ds), - rank_specs, n_rank_specs, - i, src_vars[i]); - ok = casegrouper_destroy (grouper); + split_grouper = casegrouper_create_splits (proc_open (ds), d); + output = autopaging_writer_create (dict_get_next_value_idx (d)); + + while (casegrouper_get_next_group (split_grouper, &split_group)) + { + struct subcase ordering; + struct casereader *ordered; + struct casegrouper *by_grouper; + struct casereader *by_group; + + /* Sort this split group by the BY variables as primary + keys and the rank variable as secondary key. */ + subcase_init_vars (&ordering, group_vars, n_group_vars); + subcase_add_var (&ordering, src_vars[i], + subcase_get_direction (sc, i)); + ordered = sort_execute (split_group, &ordering); + subcase_destroy (&ordering); + + /* Rank the rank variable within this split group. */ + by_grouper = casegrouper_create_vars (ordered, + group_vars, n_group_vars); + while (casegrouper_get_next_group (by_grouper, &by_group)) + { + /* Rank the rank variable within this BY group + within the split group. */ + + rank_sorted_file (by_group, output, d, rank_specs, n_rank_specs, + i, src_vars[i]); + } + ok = casegrouper_destroy (by_grouper) && ok; + } + ok = casegrouper_destroy (split_grouper); ok = proc_commit (ds) && ok; - ranked_file = casewriter_make_reader (output); - ok = proc_set_active_file_data (ds, ranked_file) && ok; + ok = (proc_set_active_file_data (ds, casewriter_make_reader (output)) + && ok); if (!ok) break; } - case_ordering_destroy (base_ordering); return ok; } @@ -476,7 +483,7 @@ rank_sorted_file (struct casereader *input, input = casereader_create_filter_missing (input, &rank_var, 1, - exclude_values, output); + exclude_values, NULL, output); input = casereader_create_filter_weight (input, dict, NULL, output); casereader_split (input, &pass1, &pass2); @@ -615,8 +622,7 @@ rank_cleanup(void) rank_specs = NULL; n_rank_specs = 0; - case_ordering_destroy (sc); - sc = NULL; + subcase_destroy (&sc); free (src_vars); src_vars = NULL; @@ -631,6 +637,7 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) size_t i; n_rank_specs = 0; + subcase_init_empty (&sc); if ( !parse_rank (lexer, ds, &cmd, NULL) ) { rank_cleanup (); @@ -650,12 +657,12 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) rank_specs = xmalloc (sizeof (*rank_specs)); rank_specs[0].rfunc = RANK; rank_specs[0].destvars = - xcalloc (case_ordering_get_var_cnt (sc), sizeof (struct variable *)); + xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *)); n_rank_specs = 1; } - assert ( case_ordering_get_var_cnt (sc) == n_src_vars); + assert ( subcase_get_n_fields (&sc) == n_src_vars); /* Create variables for all rank destinations which haven't already been created with INTO. @@ -763,17 +770,17 @@ cmd_rank (struct lexer *lexer, struct dataset *ds) add_transformation (ds, create_resort_key, 0, order); /* Do the ranking */ - result = rank_cmd (ds, sc, rank_specs, n_rank_specs); + result = rank_cmd (ds, &sc, rank_specs, n_rank_specs); /* Put the active file back in its original order. Delete our sort key, which we don't need anymore. */ { - struct case_ordering *ordering = case_ordering_create (dataset_dict (ds)); struct casereader *sorted; - case_ordering_add_var (ordering, order, SRT_ASCEND); + /* FIXME: loses error conditions. */ + proc_discard_output (ds); - sorted = sort_execute (proc_open (ds), ordering); + sorted = sort_execute_1var (proc_open (ds), order); result = proc_commit (ds) && result; dict_delete_var (dataset_dict (ds), order); @@ -798,10 +805,9 @@ rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank && lex_token (lexer) != T_ALL) return 2; - sc = parse_case_ordering (lexer, dataset_dict (ds), NULL); - if (sc == NULL) + if (!parse_sort_criteria (lexer, dataset_dict (ds), &sc, &src_vars, NULL)) return 0; - case_ordering_get_vars (sc, &src_vars, &n_src_vars); + n_src_vars = subcase_get_n_fields (&sc); if ( lex_match (lexer, T_BY) ) { @@ -835,8 +841,7 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra rank_specs[n_rank_specs - 1].destvars = NULL; rank_specs[n_rank_specs - 1].destvars = - xcalloc (case_ordering_get_var_cnt (sc), - sizeof (struct variable *)); + xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *)); if (lex_match_id (lexer, "INTO")) { @@ -850,7 +855,7 @@ parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_ra msg(SE, _("Variable %s already exists."), lex_tokid (lexer)); return 0; } - if ( var_count >= case_ordering_get_var_cnt (sc) ) + if ( var_count >= subcase_get_n_fields (&sc) ) { msg(SE, _("Too many variables in INTO clause.")); return 0;