X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Frank.q;h=c225370e459269cc572430f87e533f7a591662c4;hb=2cf38ce51a9f34961d68a75e0b312a591b5c9abf;hp=7e882ab0ce31843057143e9691cc8dcd428f793a;hpb=f43378497b8400e9c22a3485c534693dc1bc9554;p=pspp-builds.git diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q index 7e882ab0..c225370e 100644 --- a/src/language/stats/rank.q +++ b/src/language/stats/rank.q @@ -1,45 +1,44 @@ -/* PSPP - RANK. -*-c-*- +/* PSPP - a program for statistical analysis. + Copyright (C) 2005, 2006, 2007, 2009 Free Software Foundation, Inc. -Copyright (C) 2005, 2006 Free Software Foundation, Inc. -Author: John Darrington , - Ben Pfaff . + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #include -#include "sort-criteria.h" +#include +#include +#include +#include +#include +#include #include +#include +#include #include +#include +#include #include -#include -#include -#include -#include #include #include -#include #include +#include #include -#include #include +#include #include -#include #include "gettext.h" #define _(msgid) gettext (msgid) @@ -65,31 +64,31 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA /* (declarations) */ /* (functions) */ -typedef double (*rank_function_t) (double c, double cc, double cc_1, +typedef double (*rank_function_t) (double c, double cc, double cc_1, int i, double w); -static double rank_proportion (double c, double cc, double cc_1, +static double rank_proportion (double c, double cc, double cc_1, int i, double w); -static double rank_normal (double c, double cc, double cc_1, +static double rank_normal (double c, double cc, double cc_1, int i, double w); -static double rank_percent (double c, double cc, double cc_1, +static double rank_percent (double c, double cc, double cc_1, int i, double w); -static double rank_rfraction (double c, double cc, double cc_1, +static double rank_rfraction (double c, double cc, double cc_1, int i, double w); -static double rank_rank (double c, double cc, double cc_1, +static double rank_rank (double c, double cc, double cc_1, int i, double w); -static double rank_n (double c, double cc, double cc_1, +static double rank_n (double c, double cc, double cc_1, int i, double w); -static double rank_savage (double c, double cc, double cc_1, +static double rank_savage (double c, double cc, double cc_1, int i, double w); -static double rank_ntiles (double c, double cc, double cc_1, +static double rank_ntiles (double c, double cc, double cc_1, int i, double w); @@ -136,7 +135,7 @@ static const rank_function_t rank_func[n_RANK_FUNCS] = { rank_proportion, rank_n, rank_ntiles, - rank_savage + rank_savage }; @@ -147,18 +146,18 @@ struct rank_spec }; -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; +/* Categories of missing values to exclude. */ +static enum mv_class exclude_values; static struct rank_spec *rank_specs; static size_t n_rank_specs; -static struct sort_criteria *sc; +static struct subcase sc; -static struct variable **group_vars; +static const struct variable **group_vars; static size_t n_group_vars; -static struct variable **src_vars; +static const struct variable **src_vars; static size_t n_src_vars; @@ -166,19 +165,19 @@ static int k_ntiles; static struct cmd_rank cmd; -static struct casefile *rank_sorted_casefile (struct casefile *cf, - const struct sort_criteria *, - const struct dictionary *, - const struct rank_spec *rs, - int n_rank_specs, - int idx, - const struct missing_values *miss - ); +static void rank_sorted_file (struct casereader *, + struct casewriter *, + const struct dictionary *, + const struct rank_spec *rs, + int n_rank_specs, + int idx, + const struct variable *rank_var); + static const char * fraction_name(void) { static char name[10]; - switch ( cmd.fraction ) + switch ( cmd.fraction ) { case RANK_BLOM: strcpy (name, "BLOM"); @@ -200,118 +199,113 @@ fraction_name(void) /* Create a label on DEST_VAR, describing its derivation from SRC_VAR and F */ static void -create_var_label (struct variable *dest_var, +create_var_label (struct variable *dest_var, const struct variable *src_var, enum RANK_FUNC f) { struct string label; ds_init_empty (&label); - if ( n_group_vars > 0 ) + if ( n_group_vars > 0 ) { struct string group_var_str; int g; ds_init_empty (&group_var_str); - for (g = 0 ; g < n_group_vars ; ++g ) + for (g = 0 ; g < n_group_vars ; ++g ) { if ( g > 0 ) ds_put_cstr (&group_var_str, " "); - ds_put_cstr (&group_var_str, group_vars[g]->name); + ds_put_cstr (&group_var_str, var_get_name (group_vars[g])); } - ds_put_format (&label, _("%s of %s by %s"), function_name[f], - src_var->name, ds_cstr (&group_var_str)); + ds_put_format (&label, _("%s of %s by %s"), function_name[f], + var_get_name (src_var), ds_cstr (&group_var_str)); ds_destroy (&group_var_str); } else - ds_put_format (&label,_("%s of %s"), function_name[f], src_var->name); + ds_put_format (&label, _("%s of %s"), + function_name[f], var_get_name (src_var)); - dest_var->label = strdup (ds_cstr (&label) ); + var_set_label (dest_var, ds_cstr (&label)); ds_destroy (&label); } -static bool -rank_cmd (struct dataset *ds, const struct sort_criteria *sc, +static bool +rank_cmd (struct dataset *ds, const struct subcase *sc, const struct rank_spec *rank_specs, int n_rank_specs) { - struct sort_criteria criteria; - bool result = true; + struct dictionary *d = dataset_dict (ds); + bool ok = true; int i; - const int n_splits = dict_get_split_cnt (dataset_dict (ds)); - criteria.crit_cnt = n_splits + n_group_vars + 1; - criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits); - for (i = 0; i < n_splits ; i++) + for (i = 0 ; i < subcase_get_n_fields (sc) ; ++i ) { - struct variable *v = dict_get_split_vars (dataset_dict (ds))[i]; - criteria.crits[i].fv = v->fv; - criteria.crits[i].width = v->width; - criteria.crits[i].dir = SRT_ASCEND; - } - for (i = 0; i < n_group_vars; i++) - { - criteria.crits[i + n_splits].fv = group_vars[i]->fv; - criteria.crits[i + n_splits].width = group_vars[i]->width; - criteria.crits[i + n_splits].dir = SRT_ASCEND; - } - for (i = 0 ; i < sc->crit_cnt ; ++i ) - { - struct casefile *out ; - struct casefile *cf ; - struct casereader *reader ; - struct casefile *sorted_cf ; - - /* Obtain active file in CF. */ - if (!procedure (ds, NULL, NULL)) - goto error; - - cf = proc_capture_output (ds); - - /* Sort CF into SORTED_CF. */ - reader = casefile_get_destructive_reader (cf) ; - criteria.crits[criteria.crit_cnt - 1] = sc->crits[i]; - assert ( sc->crits[i].fv == src_vars[i]->fv ); - sorted_cf = sort_execute (reader, &criteria); - casefile_destroy (cf); - - out = rank_sorted_casefile (sorted_cf, &criteria, - dataset_dict (ds), - rank_specs, n_rank_specs, - i, &src_vars[i]->miss) ; - if ( NULL == out ) - { - result = false ; - continue ; - } - - proc_set_source (ds, storage_source_create (out)); - } + /* Rank variable at index I in SC. */ + struct casegrouper *split_grouper; + struct casereader *split_group; + struct casewriter *output; - free (criteria.crits); - return result ; + proc_discard_output (ds); + split_grouper = casegrouper_create_splits (proc_open (ds), d); + output = autopaging_writer_create (dict_get_proto (d)); + + while (casegrouper_get_next_group (split_grouper, &split_group)) + { + struct subcase ordering; + struct casereader *ordered; + struct casegrouper *by_grouper; + struct casereader *by_group; + + /* Sort this split group by the BY variables as primary + keys and the rank variable as secondary key. */ + subcase_init_vars (&ordering, group_vars, n_group_vars); + subcase_add_var (&ordering, src_vars[i], + subcase_get_direction (sc, i)); + ordered = sort_execute (split_group, &ordering); + subcase_destroy (&ordering); + + /* Rank the rank variable within this split group. */ + by_grouper = casegrouper_create_vars (ordered, + group_vars, n_group_vars); + while (casegrouper_get_next_group (by_grouper, &by_group)) + { + /* Rank the rank variable within this BY group + within the split group. */ -error: - free (criteria.crits); - return false ; + rank_sorted_file (by_group, output, d, rank_specs, n_rank_specs, + i, src_vars[i]); + } + ok = casegrouper_destroy (by_grouper) && ok; + } + ok = casegrouper_destroy (split_grouper); + ok = proc_commit (ds) && ok; + ok = (proc_set_active_file_data (ds, casewriter_make_reader (output)) + && ok); + if (!ok) + break; + } + + return ok; } /* Hardly a rank function !! */ -static double -rank_n (double c UNUSED, double cc UNUSED, double cc_1 UNUSED, +static double +rank_n (double c UNUSED, double cc UNUSED, double cc_1 UNUSED, int i UNUSED, double w) { return w; } -static double -rank_rank (double c, double cc, double cc_1, +static double +rank_rank (double c, double cc, double cc_1, int i, double w UNUSED) { double rank; - if ( c >= 1.0 ) + + if ( c >= 1.0 ) { switch (cmd.ties) { @@ -356,31 +350,31 @@ rank_rank (double c, double cc, double cc_1, } -static double -rank_rfraction (double c, double cc, double cc_1, +static double +rank_rfraction (double c, double cc, double cc_1, int i, double w) { return rank_rank (c, cc, cc_1, i, w) / w ; } -static double -rank_percent (double c, double cc, double cc_1, +static double +rank_percent (double c, double cc, double cc_1, int i, double w) { return rank_rank (c, cc, cc_1, i, w) * 100.0 / w ; } -static double -rank_proportion (double c, double cc, double cc_1, +static double +rank_proportion (double c, double cc, double cc_1, int i, double w) { const double r = rank_rank (c, cc, cc_1, i, w) ; double f; - - switch ( cmd.fraction ) + + switch ( cmd.fraction ) { case RANK_BLOM: f = (r - 3.0/8.0) / (w + 0.25); @@ -402,20 +396,20 @@ rank_proportion (double c, double cc, double cc_1, return (f > 0) ? f : SYSMIS; } -static double -rank_normal (double c, double cc, double cc_1, +static double +rank_normal (double c, double cc, double cc_1, int i, double w) { double f = rank_proportion (c, cc, cc_1, i, w); - + return gsl_cdf_ugaussian_Pinv (f); } -static double -rank_ntiles (double c, double cc, double cc_1, +static double +rank_ntiles (double c, double cc, double cc_1, int i, double w) { - double r = rank_rank (c, cc, cc_1, i, w); + double r = rank_rank (c, cc, cc_1, i, w); return ( floor (( r * k_ntiles) / ( w + 1) ) + 1); @@ -427,16 +421,16 @@ ee (int j, double w_star) { int k; double sum = 0.0; - - for (k = 1 ; k <= j; k++) + + for (k = 1 ; k <= j; k++) sum += 1.0 / ( w_star + 1 - k ); return sum; } -static double -rank_savage (double c, double cc, double cc_1, +static double +rank_savage (double c, double cc, double cc_1, int i UNUSED, double w) { double int_part; @@ -452,14 +446,14 @@ rank_savage (double c, double cc, double cc_1, Therefore, evaluate the second, only when the first is non-zero */ const double expr1 = (1 - g_1) ? (1 - g_1) * ee(i_1+1, w_star) : ( 1 - g_1); const double expr2 = g_2 ? g_2 * ee (i_2+1, w_star) : g_2 ; - - if ( i_1 == i_2 ) + + if ( i_1 == i_2 ) return ee (i_1 + 1, w_star) - 1; - + if ( i_1 + 1 == i_2 ) return ( ( expr1 + expr2 )/c ) - 1; - if ( i_1 + 2 <= i_2 ) + if ( i_1 + 2 <= i_2 ) { int j; double sigma = 0.0; @@ -471,197 +465,81 @@ rank_savage (double c, double cc, double cc_1, NOT_REACHED(); } - -/* Rank the casefile belonging to CR, starting from the current - postition of CR continuing up to and including the ENDth case. - - RS points to an array containing the rank specifications to - use. N_RANK_SPECS is the number of elements of RS. - - - DEST_VAR_INDEX is the index into the rank_spec destvar element - to be used for this ranking. - - Prerequisites: 1. The casefile must be sorted according to CRITERION. - 2. W is the sum of the non-missing caseweights for this - range of the casefile. -*/ static void -rank_cases (struct casereader *cr, - unsigned long end, - const struct dictionary *dict, - const struct sort_criterion *criterion, - const struct missing_values *mv, - double w, - const struct rank_spec *rs, - int n_rank_specs, - int dest_var_index, - struct casefile *dest) +rank_sorted_file (struct casereader *input, + struct casewriter *output, + const struct dictionary *dict, + const struct rank_spec *rs, + int n_rank_specs, + int dest_idx, + const struct variable *rank_var) { - bool warn = true; + struct casereader *pass1, *pass2, *pass2_1; + struct casegrouper *tie_grouper; + struct ccase *c; + double w = 0.0; double cc = 0.0; - double cc_1; - int iter = 1; - - const int fv = criterion->fv; - const int width = criterion->width; - - while (casereader_cnum (cr) < end) - { - struct casereader *lookahead; - const union value *this_value; - struct ccase this_case, lookahead_case; - double c; - int i; - size_t n = 0; - - if (!casereader_read_xfer (cr, &this_case)) - break; - - this_value = case_data (&this_case, fv); - c = dict_get_case_weight (dict, &this_case, &warn); - - lookahead = casereader_clone (cr); - n = 0; - while (casereader_cnum (lookahead) < end - && casereader_read_xfer (lookahead, &lookahead_case)) - { - const union value *lookahead_value = case_data (&lookahead_case, fv); - int diff = compare_values (this_value, lookahead_value, width); - - if (diff != 0) - { - /* Make sure the casefile was sorted */ - assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1)); - - case_destroy (&lookahead_case); - break; - } + int tie_group = 1; - c += dict_get_case_weight (dict, &lookahead_case, &warn); - case_destroy (&lookahead_case); - n++; - } - casereader_destroy (lookahead); - - cc_1 = cc; - if ( !value_is_missing (mv, this_value) ) - cc += c; - - do - { - for (i = 0; i < n_rank_specs; ++i) - { - const int dest_idx = rs[i].destvars[dest_var_index]->fv; - if ( value_is_missing (mv, this_value) ) - case_data_rw (&this_case, dest_idx)->f = SYSMIS; - else - case_data_rw (&this_case, dest_idx)->f = - rank_func[rs[i].rfunc](c, cc, cc_1, iter, w); - } - casefile_append_xfer (dest, &this_case); - } - while (n-- > 0 && casereader_read_xfer (cr, &this_case)); + input = casereader_create_filter_missing (input, &rank_var, 1, + exclude_values, NULL, output); + input = casereader_create_filter_weight (input, dict, NULL, output); - if ( !value_is_missing (mv, this_value) ) - iter++; - } + casereader_split (input, &pass1, &pass2); - /* If this isn't true, then all the results will be wrong */ - assert ( w == cc ); -} + /* Pass 1: Get total group weight. */ + for (; (c = casereader_read (pass1)) != NULL; case_unref (c)) + w += dict_get_case_weight (dict, c, NULL); + casereader_destroy (pass1); -static bool -same_group (const struct ccase *a, const struct ccase *b, - const struct sort_criteria *crit) -{ - size_t i; - - for (i = 0; i < crit->crit_cnt - 1; i++) + /* Pass 2: Do ranking. */ + tie_grouper = casegrouper_create_vars (pass2, &rank_var, 1); + while (casegrouper_get_next_group (tie_grouper, &pass2_1)) { - struct sort_criterion *c = &crit->crits[i]; - if (compare_values (case_data (a, c->fv), case_data (b, c->fv), - c->width) != 0) - return false; - } - - return true; -} - -static struct casefile * -rank_sorted_casefile (struct casefile *cf, - const struct sort_criteria *crit, - const struct dictionary *dict, - const struct rank_spec *rs, - int n_rank_specs, - int dest_idx, - const struct missing_values *mv) -{ - struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf)); - struct casereader *lookahead = casefile_get_reader (cf, NULL); - struct casereader *pos = casereader_clone (lookahead); - struct ccase group_case; - bool warn = true; - - struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1]; + struct casereader *pass2_2; + double cc_1 = cc; + double tw = 0.0; + int i; - if (casereader_read (lookahead, &group_case)) - { - struct ccase this_case; - const union value *this_value ; - double w = 0.0; - this_value = case_data( &group_case, ultimate_crit->fv); + pass2_2 = casereader_clone (pass2_1); + taint_propagate (casereader_get_taint (pass2_2), + casewriter_get_taint (output)); - if ( !value_is_missing(mv, this_value) ) - w = dict_get_case_weight (dict, &group_case, &warn); + /* Pass 2.1: Sum up weight for tied cases. */ + for (; (c = casereader_read (pass2_1)) != NULL; case_unref (c)) + tw += dict_get_case_weight (dict, c, NULL); + cc += tw; + casereader_destroy (pass2_1); - while (casereader_read (lookahead, &this_case)) + /* Pass 2.2: Rank tied cases. */ + while ((c = casereader_read (pass2_2)) != NULL) { - const union value *this_value = - case_data(&this_case, ultimate_crit->fv); - double c = dict_get_case_weight (dict, &this_case, &warn); - if (!same_group (&group_case, &this_case, crit)) + c = case_unshare (c); + for (i = 0; i < n_rank_specs; ++i) { - rank_cases (pos, casereader_cnum (lookahead) - 1, - dict, - ultimate_crit, - mv, w, - rs, n_rank_specs, - dest_idx, dest); - - w = 0.0; - case_destroy (&group_case); - case_move (&group_case, &this_case); + const struct variable *dst_var = rs[i].destvars[dest_idx]; + double *dst_value = &case_data_rw (c, dst_var)->f; + *dst_value = rank_func[rs[i].rfunc] (tw, cc, cc_1, tie_group, w); } - if ( !value_is_missing (mv, this_value) ) - w += c; - case_destroy (&this_case); + casewriter_write (output, c); } - case_destroy (&group_case); - rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w, - rs, n_rank_specs, dest_idx, dest); - } + casereader_destroy (pass2_2); - if (casefile_error (dest)) - { - casefile_destroy (dest); - dest = NULL; + tie_group++; } - - casefile_destroy (cf); - return dest; + casegrouper_destroy (tie_grouper); } - /* Transformation function to enumerate all the cases */ -static int -create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num) +static int +create_resort_key (void *key_var_, struct ccase **cc, casenumber case_num) { struct variable *key_var = key_var_; - case_data_rw(cc, key_var->fv)->f = case_num; - + *cc = case_unshare (*cc); + case_data_rw (*cc, key_var)->f = case_num; + return TRNS_CONTINUE; } @@ -672,22 +550,22 @@ create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num) If VNAME is NULL, then a name will be automatically chosen. */ static struct variable * -create_rank_variable (struct dictionary *dict, enum RANK_FUNC f, - const struct variable *src_var, +create_rank_variable (struct dictionary *dict, enum RANK_FUNC f, + const struct variable *src_var, const char *vname) { int i; - struct variable *var = NULL; + struct variable *var = NULL; char name[SHORT_NAME_LEN + 1]; - if ( vname ) + if ( vname ) var = dict_create_var(dict, vname, 0); if ( NULL == var ) { - snprintf(name, SHORT_NAME_LEN + 1, "%c%s", - function_name[f][0], src_var->name); - + snprintf (name, SHORT_NAME_LEN + 1, "%c%s", + function_name[f][0], var_get_name (src_var)); + var = dict_create_var(dict, name, 0); } @@ -696,11 +574,11 @@ create_rank_variable (struct dictionary *dict, enum RANK_FUNC f, { char func_abb[4]; snprintf(func_abb, 4, "%s", function_name[f]); - snprintf(name, SHORT_NAME_LEN + 1, "%s%03d", func_abb, + snprintf(name, SHORT_NAME_LEN + 1, "%s%03d", func_abb, i); var = dict_create_var(dict, name, 0); - if (i++ >= 999) + if (i++ >= 999) break; } @@ -710,26 +588,25 @@ create_rank_variable (struct dictionary *dict, enum RANK_FUNC f, char func_abb[3]; snprintf(func_abb, 3, "%s", function_name[f]); - snprintf(name, SHORT_NAME_LEN + 1, + snprintf(name, SHORT_NAME_LEN + 1, "RNK%s%02d", func_abb, i); var = dict_create_var(dict, name, 0); - if ( i++ >= 99 ) + if ( i++ >= 99 ) break; } - - if ( NULL == var ) + + if ( NULL == var ) { msg(ME, _("Cannot create new rank variable. All candidates in use.")); return NULL; } - var->write = var->print = dest_format[f]; + var_set_both_formats (var, &dest_format[f]); return var; } -int cmd_rank(struct dataset *ds); static void rank_cleanup(void) @@ -739,18 +616,15 @@ rank_cleanup(void) free (group_vars); group_vars = NULL; n_group_vars = 0; - + for (i = 0 ; i < n_rank_specs ; ++i ) - { free (rank_specs[i].destvars); - } - + free (rank_specs); rank_specs = NULL; n_rank_specs = 0; - sort_destroy_criteria (sc); - sc = NULL; + subcase_destroy (&sc); free (src_vars); src_vars = NULL; @@ -758,42 +632,39 @@ rank_cleanup(void) } int -cmd_rank (struct dataset *ds) +cmd_rank (struct lexer *lexer, struct dataset *ds) { bool result; struct variable *order; size_t i; n_rank_specs = 0; - if ( !parse_rank (ds, &cmd, NULL) ) + subcase_init_empty (&sc); + if ( !parse_rank (lexer, ds, &cmd, NULL) ) { rank_cleanup (); return CMD_FAILURE; } /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.miss == RANK_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; - + exclude_values = cmd.miss == RANK_INCLUDE ? MV_SYSTEM : MV_ANY; /* Default to /RANK if no function subcommands are given */ - if ( !( cmd.sbc_normal || cmd.sbc_ntiles || cmd.sbc_proportion || - cmd.sbc_rfraction || cmd.sbc_savage || cmd.sbc_n || + if ( !( cmd.sbc_normal || cmd.sbc_ntiles || cmd.sbc_proportion || + cmd.sbc_rfraction || cmd.sbc_savage || cmd.sbc_n || cmd.sbc_percent || cmd.sbc_rank ) ) { assert ( n_rank_specs == 0 ); - + rank_specs = xmalloc (sizeof (*rank_specs)); rank_specs[0].rfunc = RANK; - rank_specs[0].destvars = - xcalloc (sc->crit_cnt, sizeof (struct variable *)); + rank_specs[0].destvars = + xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *)); n_rank_specs = 1; } - assert ( sc->crit_cnt == n_src_vars); + assert ( subcase_get_n_fields (&sc) == n_src_vars); /* Create variables for all rank destinations which haven't already been created with INTO. @@ -802,30 +673,30 @@ cmd_rank (struct dataset *ds) for (i = 0 ; i < n_rank_specs ; ++i ) { int v; - for ( v = 0 ; v < n_src_vars ; v ++ ) + for ( v = 0 ; v < n_src_vars ; v ++ ) { - if ( rank_specs[i].destvars[v] == NULL ) + if ( rank_specs[i].destvars[v] == NULL ) { - rank_specs[i].destvars[v] = + rank_specs[i].destvars[v] = create_rank_variable (dataset_dict(ds), rank_specs[i].rfunc, src_vars[v], NULL); } - + create_var_label ( rank_specs[i].destvars[v], src_vars[v], rank_specs[i].rfunc); } } - if ( cmd.print == RANK_YES ) + if ( cmd.print == RANK_YES ) { int v; tab_output_text (0, _("Variables Created By RANK")); tab_output_text (0, "\n"); - + for (i = 0 ; i < n_rank_specs ; ++i ) { - for ( v = 0 ; v < n_src_vars ; v ++ ) + for ( v = 0 ; v < n_src_vars ; v ++ ) { if ( n_group_vars > 0 ) { @@ -833,64 +704,60 @@ cmd_rank (struct dataset *ds) int g; ds_init_empty (&varlist); - for ( g = 0 ; g < n_group_vars ; ++g ) + for ( g = 0 ; g < n_group_vars ; ++g ) { - ds_put_cstr (&varlist, group_vars[g]->name); + ds_put_cstr (&varlist, var_get_name (group_vars[g])); if ( g < n_group_vars - 1) ds_put_cstr (&varlist, " "); } - if ( rank_specs[i].rfunc == NORMAL || - rank_specs[i].rfunc == PROPORTION ) - tab_output_text (TAT_PRINTF, - _("%s into %s(%s of %s using %s BY %s)"), - src_vars[v]->name, - rank_specs[i].destvars[v]->name, - function_name[rank_specs[i].rfunc], - src_vars[v]->name, - fraction_name(), - ds_cstr (&varlist) - ); - + if ( rank_specs[i].rfunc == NORMAL || + rank_specs[i].rfunc == PROPORTION ) + tab_output_text_format (0, + _("%s into %s(%s of %s using %s BY %s)"), + var_get_name (src_vars[v]), + var_get_name (rank_specs[i].destvars[v]), + function_name[rank_specs[i].rfunc], + var_get_name (src_vars[v]), + fraction_name(), + ds_cstr (&varlist)); + else - tab_output_text (TAT_PRINTF, - _("%s into %s(%s of %s BY %s)"), - src_vars[v]->name, - rank_specs[i].destvars[v]->name, - function_name[rank_specs[i].rfunc], - src_vars[v]->name, - ds_cstr (&varlist) - ); + tab_output_text_format (0, + _("%s into %s(%s of %s BY %s)"), + var_get_name (src_vars[v]), + var_get_name (rank_specs[i].destvars[v]), + function_name[rank_specs[i].rfunc], + var_get_name (src_vars[v]), + ds_cstr (&varlist)); ds_destroy (&varlist); } else { - if ( rank_specs[i].rfunc == NORMAL || - rank_specs[i].rfunc == PROPORTION ) - tab_output_text (TAT_PRINTF, - _("%s into %s(%s of %s using %s)"), - src_vars[v]->name, - rank_specs[i].destvars[v]->name, - function_name[rank_specs[i].rfunc], - src_vars[v]->name, - fraction_name() - ); - + if ( rank_specs[i].rfunc == NORMAL || + rank_specs[i].rfunc == PROPORTION ) + tab_output_text_format (0, + _("%s into %s(%s of %s using %s)"), + var_get_name (src_vars[v]), + var_get_name (rank_specs[i].destvars[v]), + function_name[rank_specs[i].rfunc], + var_get_name (src_vars[v]), + fraction_name()); + else - tab_output_text (TAT_PRINTF, - _("%s into %s(%s of %s)"), - src_vars[v]->name, - rank_specs[i].destvars[v]->name, - function_name[rank_specs[i].rfunc], - src_vars[v]->name - ); + tab_output_text_format (0, + _("%s into %s(%s of %s)"), + var_get_name (src_vars[v]), + var_get_name (rank_specs[i].destvars[v]), + function_name[rank_specs[i].rfunc], + var_get_name (src_vars[v])); } } } } - if ( cmd.sbc_fraction && + if ( cmd.sbc_fraction && ( ! cmd.sbc_normal && ! cmd.sbc_proportion) ) msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank functions have not been requested. The FRACTION subcommand will be ignored.") ); @@ -901,56 +768,55 @@ cmd_rank (struct dataset *ds) add_transformation (ds, create_resort_key, 0, order); /* Do the ranking */ - result = rank_cmd (ds, sc, rank_specs, n_rank_specs); + result = rank_cmd (ds, &sc, rank_specs, n_rank_specs); - /* Put the active file back in its original order */ + /* Put the active file back in its original order. Delete + our sort key, which we don't need anymore. */ { - struct sort_criteria criteria; - struct sort_criterion restore_criterion ; - restore_criterion.fv = order->fv; - restore_criterion.width = 0; - restore_criterion.dir = SRT_ASCEND; - - criteria.crits = &restore_criterion; - criteria.crit_cnt = 1; - - sort_active_file_in_place (ds, &criteria); -} + struct casereader *sorted; - /* ... and we don't need our sort key anymore. So delete it */ - dict_delete_var (dataset_dict (ds), order); + /* FIXME: loses error conditions. */ + + proc_discard_output (ds); + sorted = sort_execute_1var (proc_open (ds), order); + result = proc_commit (ds) && result; + + dict_delete_var (dataset_dict (ds), order); + result = proc_set_active_file_data (ds, sorted) && result; + } rank_cleanup(); + return (result ? CMD_SUCCESS : CMD_CASCADING_FAILURE); } -/* Parser for the variables sub command +/* Parser for the variables sub command Returns 1 on success */ static int -rank_custom_variables (struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED) +rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED) { - static const int terminators[2] = {T_BY, 0}; + lex_match (lexer, '='); - lex_match('='); - - if ((token != T_ID || dict_lookup_var (dataset_dict (ds), tokid) == NULL) - && token != T_ALL) + if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) return 2; - sc = sort_parse_criteria (dataset_dict (ds), - &src_vars, &n_src_vars, 0, terminators); + if (!parse_sort_criteria (lexer, dataset_dict (ds), &sc, &src_vars, NULL)) + return 0; + n_src_vars = subcase_get_n_fields (&sc); - if ( lex_match(T_BY) ) + if ( lex_match (lexer, T_BY) ) { - if ((token != T_ID || dict_lookup_var (dataset_dict (ds), tokid) == NULL)) + if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)) { return 2; } - if (!parse_variables (dataset_dict (ds), &group_vars, &n_group_vars, - PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) + if (!parse_variables_const (lexer, dataset_dict (ds), + &group_vars, &n_group_vars, + PV_NO_DUPLICATE | PV_NO_SCRATCH) ) { free (group_vars); return 0; @@ -963,40 +829,40 @@ rank_custom_variables (struct dataset *ds, struct cmd_rank *cmd UNUSED, void *au /* Parse the [/rank INTO var1 var2 ... varN ] clause */ static int -parse_rank_function(struct dictionary *dict, struct cmd_rank *cmd UNUSED, enum RANK_FUNC f) +parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_rank *cmd UNUSED, enum RANK_FUNC f) { int var_count = 0; - + n_rank_specs++; rank_specs = xnrealloc(rank_specs, n_rank_specs, sizeof *rank_specs); rank_specs[n_rank_specs - 1].rfunc = f; rank_specs[n_rank_specs - 1].destvars = NULL; - rank_specs[n_rank_specs - 1].destvars = - xcalloc (sc->crit_cnt, sizeof (struct variable *)); - - if (lex_match_id("INTO")) + rank_specs[n_rank_specs - 1].destvars = + xcalloc (subcase_get_n_fields (&sc), sizeof (struct variable *)); + + if (lex_match_id (lexer, "INTO")) { struct variable *destvar; - while( token == T_ID ) + while( lex_token (lexer) == T_ID ) { - if ( dict_lookup_var (dict, tokid) != NULL ) + if ( dict_lookup_var (dict, lex_tokid (lexer)) != NULL ) { - msg(SE, _("Variable %s already exists."), tokid); + msg(SE, _("Variable %s already exists."), lex_tokid (lexer)); return 0; } - if ( var_count >= sc->crit_cnt ) + if ( var_count >= subcase_get_n_fields (&sc) ) { msg(SE, _("Too many variables in INTO clause.")); return 0; } - destvar = create_rank_variable (dict, f, src_vars[var_count], tokid); + destvar = create_rank_variable (dict, f, src_vars[var_count], lex_tokid (lexer)); rank_specs[n_rank_specs - 1].destvars[var_count] = destvar ; - lex_get(); + lex_get (lexer); ++var_count; } } @@ -1006,74 +872,74 @@ parse_rank_function(struct dictionary *dict, struct cmd_rank *cmd UNUSED, enum R static int -rank_custom_rank(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_rank (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, RANK); + return parse_rank_function (lexer, dict, cmd, RANK); } static int -rank_custom_normal(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_normal (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, NORMAL); + return parse_rank_function (lexer, dict, cmd, NORMAL); } static int -rank_custom_percent(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_percent (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, PERCENT); + return parse_rank_function (lexer, dict, cmd, PERCENT); } static int -rank_custom_rfraction(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_rfraction (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, RFRACTION); + return parse_rank_function (lexer, dict, cmd, RFRACTION); } static int -rank_custom_proportion(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_proportion (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, PROPORTION); + return parse_rank_function (lexer, dict, cmd, PROPORTION); } static int -rank_custom_n (struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_n (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, N); + return parse_rank_function (lexer, dict, cmd, N); } static int -rank_custom_savage(struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_savage (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - return parse_rank_function (dict, cmd, SAVAGE); + return parse_rank_function (lexer, dict, cmd, SAVAGE); } static int -rank_custom_ntiles (struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) +rank_custom_ntiles (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) { struct dictionary *dict = dataset_dict (ds); - if ( lex_force_match('(') ) + if ( lex_force_match (lexer, '(') ) { - if ( lex_force_int() ) + if ( lex_force_int (lexer) ) { - k_ntiles = lex_integer (); - lex_get(); - lex_force_match(')'); + k_ntiles = lex_integer (lexer); + lex_get (lexer); + lex_force_match (lexer, ')'); } else return 0; @@ -1081,5 +947,11 @@ rank_custom_ntiles (struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED ) else return 0; - return parse_rank_function(dict, cmd, NTILES); + return parse_rank_function (lexer, dict, cmd, NTILES); } + +/* + Local Variables: + mode: c + End: +*/