+
+static double
+rank_proportion (double c, double cc, double cc_1,
+ int i, double w)
+{
+ const double r = rank_rank (c, cc, cc_1, i, w) ;
+
+ double f;
+
+ switch ( cmd.fraction )
+ {
+ case RANK_BLOM:
+ f = (r - 3.0/8.0) / (w + 0.25);
+ break;
+ case RANK_RANKIT:
+ f = (r - 0.5) / w ;
+ break;
+ case RANK_TUKEY:
+ f = (r - 1.0/3.0) / (w + 1.0/3.0);
+ break;
+ case RANK_VW:
+ f = r / ( w + 1.0);
+ break;
+ default:
+ NOT_REACHED ();
+ }
+
+
+ return (f > 0) ? f : SYSMIS;
+}
+
+static double
+rank_normal (double c, double cc, double cc_1,
+ int i, double w)
+{
+ double f = rank_proportion (c, cc, cc_1, i, w);
+
+ return gsl_cdf_ugaussian_Pinv (f);
+}
+
+static double
+rank_ntiles (double c, double cc, double cc_1,
+ int i, double w)
+{
+ double r = rank_rank (c, cc, cc_1, i, w);
+
+
+ return ( floor (( r * k_ntiles) / ( w + 1) ) + 1);
+}
+
+/* Expected value of the order statistics from an exponential distribution */
+static double
+ee (int j, double w_star)
+{
+ int k;
+ double sum = 0.0;
+
+ for (k = 1 ; k <= j; k++)
+ sum += 1.0 / ( w_star + 1 - k );
+
+ return sum;
+}
+
+
+static double
+rank_savage (double c, double cc, double cc_1,
+ int i UNUSED, double w)
+{
+ double int_part;
+ const int i_1 = floor (cc_1);
+ const int i_2 = floor (cc);
+
+ const double w_star = (modf (w, &int_part) == 0 ) ? w : floor (w) + 1;
+
+ const double g_1 = cc_1 - i_1;
+ const double g_2 = cc - i_2;
+
+ /* The second factor is infinite, when the first is zero.
+ Therefore, evaluate the second, only when the first is non-zero */
+ const double expr1 = (1 - g_1) ? (1 - g_1) * ee(i_1+1, w_star) : ( 1 - g_1);
+ const double expr2 = g_2 ? g_2 * ee (i_2+1, w_star) : g_2 ;
+
+ if ( i_1 == i_2 )
+ return ee (i_1 + 1, w_star) - 1;
+
+ if ( i_1 + 1 == i_2 )
+ return ( ( expr1 + expr2 )/c ) - 1;
+
+ if ( i_1 + 2 <= i_2 )
+ {
+ int j;
+ double sigma = 0.0;
+ for (j = i_1 + 2 ; j <= i_2; ++j )
+ sigma += ee (j, w_star);
+ return ( (expr1 + expr2 + sigma) / c) -1;
+ }
+
+ NOT_REACHED();
+}
+
+
+/* Rank the casefile belonging to CR, starting from the current
+ postition of CR continuing up to and including the ENDth case.
+
+ RS points to an array containing the rank specifications to
+ use. N_RANK_SPECS is the number of elements of RS.
+
+
+ DEST_VAR_INDEX is the index into the rank_spec destvar element
+ to be used for this ranking.
+
+ Prerequisites: 1. The casefile must be sorted according to CRITERION.
+ 2. W is the sum of the non-missing caseweights for this
+ range of the casefile.
+*/
+static void
+rank_cases (struct casereader *cr,
+ unsigned long end,
+ const struct dictionary *dict,
+ const struct sort_criterion *criterion,
+ const struct missing_values *mv,
+ double w,
+ const struct rank_spec *rs,
+ int n_rank_specs,
+ int dest_var_index,
+ struct casefile *dest)
+{
+ bool warn = true;
+ double cc = 0.0;
+ double cc_1;
+ int iter = 1;
+
+ const int fv = criterion->fv;
+ const int width = criterion->width;
+
+ while (casereader_cnum (cr) < end)
+ {
+ struct casereader *lookahead;
+ const union value *this_value;
+ struct ccase this_case, lookahead_case;
+ double c;
+ int i;
+ size_t n = 0;
+
+ if (!casereader_read_xfer (cr, &this_case))
+ break;
+
+ this_value = case_data (&this_case, fv);
+ c = dict_get_case_weight (dict, &this_case, &warn);
+
+ lookahead = casereader_clone (cr);
+ n = 0;
+ while (casereader_cnum (lookahead) < end
+ && casereader_read_xfer (lookahead, &lookahead_case))
+ {
+ const union value *lookahead_value = case_data (&lookahead_case, fv);
+ int diff = compare_values (this_value, lookahead_value, width);
+
+ if (diff != 0)
+ {
+ /* Make sure the casefile was sorted */
+ assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1));
+
+ case_destroy (&lookahead_case);
+ break;
+ }
+
+ c += dict_get_case_weight (dict, &lookahead_case, &warn);
+ case_destroy (&lookahead_case);
+ n++;
+ }
+ casereader_destroy (lookahead);
+
+ cc_1 = cc;
+ if ( !value_is_missing (mv, this_value) )
+ cc += c;
+
+ do
+ {
+ for (i = 0; i < n_rank_specs; ++i)
+ {
+ const int dest_idx = rs[i].destvars[dest_var_index]->fv;
+
+ if ( value_is_missing (mv, this_value) )
+ case_data_rw (&this_case, dest_idx)->f = SYSMIS;
+ else
+ case_data_rw (&this_case, dest_idx)->f =
+ rank_func[rs[i].rfunc](c, cc, cc_1, iter, w);
+ }
+ casefile_append_xfer (dest, &this_case);
+ }
+ while (n-- > 0 && casereader_read_xfer (cr, &this_case));
+
+ if ( !value_is_missing (mv, this_value) )
+ iter++;
+ }
+
+ /* If this isn't true, then all the results will be wrong */
+ assert ( w == cc );
+}
+
+static bool
+same_group (const struct ccase *a, const struct ccase *b,
+ const struct sort_criteria *crit)
+{
+ size_t i;
+
+ for (i = 0; i < crit->crit_cnt - 1; i++)
+ {
+ struct sort_criterion *c = &crit->crits[i];
+ if (compare_values (case_data (a, c->fv), case_data (b, c->fv),
+ c->width) != 0)
+ return false;
+ }
+
+ return true;
+}
+
+static struct casefile *
+rank_sorted_casefile (struct casefile *cf,
+ const struct sort_criteria *crit,
+ const struct dictionary *dict,
+ const struct rank_spec *rs,
+ int n_rank_specs,
+ int dest_idx,
+ const struct missing_values *mv)
+{
+ struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf));
+ struct casereader *lookahead = casefile_get_reader (cf, NULL);
+ struct casereader *pos = casereader_clone (lookahead);
+ struct ccase group_case;
+ bool warn = true;
+
+ struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1];
+
+ if (casereader_read (lookahead, &group_case))
+ {
+ struct ccase this_case;
+ const union value *this_value ;
+ double w = 0.0;
+ this_value = case_data( &group_case, ultimate_crit->fv);
+
+ if ( !value_is_missing(mv, this_value) )
+ w = dict_get_case_weight (dict, &group_case, &warn);
+
+ while (casereader_read (lookahead, &this_case))
+ {
+ const union value *this_value =
+ case_data(&this_case, ultimate_crit->fv);
+ double c = dict_get_case_weight (dict, &this_case, &warn);
+ if (!same_group (&group_case, &this_case, crit))
+ {
+ rank_cases (pos, casereader_cnum (lookahead) - 1,
+ dict,
+ ultimate_crit,
+ mv, w,
+ rs, n_rank_specs,
+ dest_idx, dest);
+
+ w = 0.0;
+ case_destroy (&group_case);
+ case_move (&group_case, &this_case);
+ }
+ if ( !value_is_missing (mv, this_value) )
+ w += c;
+ case_destroy (&this_case);
+ }
+ case_destroy (&group_case);
+ rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w,
+ rs, n_rank_specs, dest_idx, dest);
+ }
+
+ if (casefile_error (dest))
+ {
+ casefile_destroy (dest);
+ dest = NULL;
+ }
+
+ casefile_destroy (cf);
+ return dest;
+}
+
+
+/* Transformation function to enumerate all the cases */
+static int
+create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num)
+{
+ struct variable *key_var = key_var_;
+
+ case_data_rw(cc, key_var->fv)->f = case_num;
+
+ return TRNS_CONTINUE;
+}
+
+
+/* Create and return a new variable in which to store the ranks of SRC_VAR
+ accoring to the rank function F.
+ VNAME is the name of the variable to be created.
+ If VNAME is NULL, then a name will be automatically chosen.
+ */
+static struct variable *
+create_rank_variable (struct dictionary *dict, enum RANK_FUNC f,
+ const struct variable *src_var,
+ const char *vname)
+{
+ int i;
+ struct variable *var = NULL;
+ char name[SHORT_NAME_LEN + 1];
+
+ if ( vname )
+ var = dict_create_var(dict, vname, 0);
+
+ if ( NULL == var )
+ {
+ snprintf(name, SHORT_NAME_LEN + 1, "%c%s",
+ function_name[f][0], src_var->name);
+
+ var = dict_create_var(dict, name, 0);
+ }
+
+ i = 1;
+ while( NULL == var )
+ {
+ char func_abb[4];
+ snprintf(func_abb, 4, "%s", function_name[f]);
+ snprintf(name, SHORT_NAME_LEN + 1, "%s%03d", func_abb,
+ i);
+
+ var = dict_create_var(dict, name, 0);
+ if (i++ >= 999)
+ break;
+ }
+
+ i = 1;
+ while ( NULL == var )
+ {
+ char func_abb[3];
+ snprintf(func_abb, 3, "%s", function_name[f]);
+
+ snprintf(name, SHORT_NAME_LEN + 1,
+ "RNK%s%02d", func_abb, i);
+
+ var = dict_create_var(dict, name, 0);
+ if ( i++ >= 99 )
+ break;
+ }
+
+ if ( NULL == var )
+ {
+ msg(ME, _("Cannot create new rank variable. All candidates in use."));
+ return NULL;
+ }
+
+ var->write = var->print = dest_format[f];
+
+ return var;
+}
+
+int cmd_rank(struct dataset *ds);
+
+static void
+rank_cleanup(void)
+{
+ int i;
+
+ free (group_vars);
+ group_vars = NULL;
+ n_group_vars = 0;
+
+ for (i = 0 ; i < n_rank_specs ; ++i )
+ {
+ free (rank_specs[i].destvars);
+ }
+
+ free (rank_specs);
+ rank_specs = NULL;
+ n_rank_specs = 0;
+
+ sort_destroy_criteria (sc);
+ sc = NULL;
+
+ free (src_vars);
+ src_vars = NULL;
+ n_src_vars = 0;
+}