src/language/stats/rank.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2005, 2006, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "data/case.h"
  20 #include "data/casegrouper.h"
  21 #include "data/casereader.h"
  22 #include "data/dataset.h"
  23 #include "data/dictionary.h"
  24 #include "data/format.h"
  25 #include "data/variable.h"
  26 #include "data/subcase.h"
  27 #include "data/casewriter.h"
  28 #include "data/short-names.h"
  29
  30 #include "language/command.h"
  31 #include "language/lexer/lexer.h"
  32 #include "language/lexer/variable-parser.h"
  33 #include "language/stats/sort-criteria.h"
  34
  35 #include "math/sort.h"
  36
  37 #include "libpspp/assertion.h"
  38 #include "libpspp/misc.h"
  39 #include "libpspp/taint.h"
  40 #include "libpspp/pool.h"
  41 #include "libpspp/message.h"
  42
  43
  44 #include "output/tab.h"
  45
  46 #include <math.h>
  47
  48 #include <gsl/gsl_cdf.h>
  49
  50 #include "gettext.h"
  51 #define _(msgid) gettext (msgid)
  52 #define N_(msgid) (msgid)
  53
  54 struct rank;
  55
  56 typedef double (*rank_function_t) (const struct rank*, double c, double cc, double cc_1,
  57                                    int i, double w);
  58
  59 static double rank_proportion (const struct rank *, double c, double cc, double cc_1,
  60                                int i, double w);
  61
  62 static double rank_normal (const struct rank *, double c, double cc, double cc_1,
  63                            int i, double w);
  64
  65 static double rank_percent (const struct rank *, double c, double cc, double cc_1,
  66                             int i, double w);
  67
  68 static double rank_rfraction (const struct rank *, double c, double cc, double cc_1,
  69                               int i, double w);
  70
  71 static double rank_rank (const struct rank *, double c, double cc, double cc_1,
  72                          int i, double w);
  73
  74 static double rank_n (const struct rank *, double c, double cc, double cc_1,
  75                       int i, double w);
  76
  77 static double rank_savage (const struct rank *, double c, double cc, double cc_1,
  78                            int i, double w);
  79
  80 static double rank_ntiles (const struct rank *, double c, double cc, double cc_1,
  81                            int i, double w);
  82
  83
  84 enum RANK_FUNC
  85   {
  86     RANK,
  87     NORMAL,
  88     PERCENT,
  89     RFRACTION,
  90     PROPORTION,
  91     N,
  92     NTILES,
  93     SAVAGE,
  94     n_RANK_FUNCS
  95   };
  96
  97 static const struct fmt_spec dest_format[n_RANK_FUNCS] = {
  98   {FMT_F, 9, 3}, /* rank */
  99   {FMT_F, 6, 4}, /* normal */
 100   {FMT_F, 6, 2}, /* percent */
 101   {FMT_F, 6, 4}, /* rfraction */
 102   {FMT_F, 6, 4}, /* proportion */
 103   {FMT_F, 6, 0}, /* n */
 104   {FMT_F, 3, 0}, /* ntiles */
 105   {FMT_F, 8, 4}  /* savage */
 106 };
 107
 108 static const char * const function_name[n_RANK_FUNCS] = {
 109   "RANK",
 110   "NORMAL",
 111   "PERCENT",
 112   "RFRACTION",
 113   "PROPORTION",
 114   "N",
 115   "NTILES",
 116   "SAVAGE"
 117 };
 118
 119 static const rank_function_t rank_func[n_RANK_FUNCS] = {
 120   rank_rank,
 121   rank_normal,
 122   rank_percent,
 123   rank_rfraction,
 124   rank_proportion,
 125   rank_n,
 126   rank_ntiles,
 127   rank_savage
 128 };
 129
 130
 131 enum ties
 132   {
 133     TIES_LOW,
 134     TIES_HIGH,
 135     TIES_MEAN,
 136     TIES_CONDENSE
 137   };
 138
 139 enum fraction
 140   {
 141     FRAC_BLOM,
 142     FRAC_RANKIT,
 143     FRAC_TUKEY,
 144     FRAC_VW
 145   };
 146
 147 struct rank_spec
 148 {
 149   enum RANK_FUNC rfunc;
 150   struct variable **destvars;
 151 };
 152
 153
 154 /* Create and return a new variable in which to store the ranks of SRC_VAR
 155    accoring to the rank function F.
 156    VNAME is the name of the variable to be created.
 157    If VNAME is NULL, then a name will be automatically chosen.
 158 */
 159 static struct variable *
 160 create_rank_variable (struct dictionary *dict, enum RANK_FUNC f,
 161                       const struct variable *src_var,
 162                       const char *vname)
 163 {
 164   int i;
 165   struct variable *var = NULL;
 166   char name[SHORT_NAME_LEN + 1];
 167
 168   if ( vname )
 169     var = dict_create_var(dict, vname, 0);
 170
 171   if ( NULL == var )
 172     {
 173       snprintf (name, SHORT_NAME_LEN + 1, "%c%s",
 174                 function_name[f][0], var_get_name (src_var));
 175
 176       var = dict_create_var(dict, name, 0);
 177     }
 178   i = 1;
 179   while( NULL == var )
 180     {
 181       char func_abb[4];
 182       snprintf(func_abb, 4, "%s", function_name[f]);
 183       snprintf(name, SHORT_NAME_LEN + 1, "%s%03d", func_abb,
 184                i);
 185
 186       var = dict_create_var(dict, name, 0);
 187       if (i++ >= 999)
 188         break;
 189     }
 190
 191   i = 1;
 192   while ( NULL == var )
 193     {
 194       char func_abb[3];
 195       snprintf(func_abb, 3, "%s", function_name[f]);
 196
 197       snprintf(name, SHORT_NAME_LEN + 1,
 198                "RNK%s%02d", func_abb, i);
 199
 200       var = dict_create_var(dict, name, 0);
 201       if ( i++ >= 99 )
 202         break;
 203     }
 204
 205   if ( NULL == var )
 206     {
 207       msg(ME, _("Cannot create new rank variable.  All candidates in use."));
 208       return NULL;
 209     }
 210
 211   var_set_both_formats (var, &dest_format[f]);
 212
 213   return var;
 214 }
 215
 216 struct rank
 217 {
 218   struct dictionary *dict;
 219
 220   struct subcase sc;
 221
 222   const struct variable **vars;
 223   size_t n_vars;
 224
 225   bool ascending;
 226
 227   const struct variable **group_vars;
 228   size_t n_group_vars;
 229
 230
 231   enum mv_class exclude;
 232
 233   struct rank_spec *rs;
 234   size_t n_rs;
 235
 236   enum ties ties;
 237
 238   enum fraction fraction;
 239   int k_ntiles;
 240
 241   bool print;
 242
 243   /* Pool on which cell functions may allocate data */
 244   struct pool *pool;
 245 };
 246
 247
 248 static void
 249 destroy_rank (struct rank *rank)
 250 {
 251  free (rank->vars);
 252  free (rank->group_vars);
 253  subcase_destroy (&rank->sc);
 254  pool_destroy (rank->pool);
 255 }
 256
 257 static bool
 258 parse_into (struct lexer *lexer, struct rank *cmd)
 259 {
 260   int var_count = 0;
 261   struct rank_spec *rs = NULL;
 262
 263   cmd->rs = pool_realloc (cmd->pool, cmd->rs, sizeof (*cmd->rs) * (cmd->n_rs + 1));
 264   rs = &cmd->rs[cmd->n_rs];
 265
 266   if (lex_match_id (lexer, "RANK"))
 267     {
 268       rs->rfunc = RANK;
 269     }
 270   else if (lex_match_id (lexer, "NORMAL"))
 271     {
 272       rs->rfunc = NORMAL;
 273     }
 274   else if (lex_match_id (lexer, "RFRACTION"))
 275     {
 276       rs->rfunc = RFRACTION;
 277     }
 278   else if (lex_match_id (lexer, "N"))
 279     {
 280       rs->rfunc = N;
 281     }
 282   else if (lex_match_id (lexer, "SAVAGE"))
 283     {
 284       rs->rfunc = SAVAGE;
 285     }
 286   else if (lex_match_id (lexer, "PERCENT"))
 287     {
 288       rs->rfunc = PERCENT;
 289     }
 290   else if (lex_match_id (lexer, "PROPORTION"))
 291     {
 292       rs->rfunc = PROPORTION;
 293     }
 294   else if (lex_match_id (lexer, "NTILES"))
 295     {
 296       if ( !lex_force_match (lexer, T_LPAREN))
 297         return false;
 298
 299       if (! lex_force_int (lexer) )
 300         return false;
 301
 302       cmd->k_ntiles = lex_integer (lexer);
 303       lex_get (lexer);
 304
 305       if ( !lex_force_match (lexer, T_RPAREN))
 306         return false;
 307
 308       rs->rfunc = NTILES;
 309     }
 310   else
 311     {
 312       return false;
 313     }
 314
 315   cmd->n_rs++;
 316   rs->destvars = NULL;
 317   rs->destvars = pool_calloc (cmd->pool, cmd->n_vars, sizeof (*rs->destvars));
 318
 319   if (lex_match_id (lexer, "INTO"))
 320     {
 321       while( lex_token (lexer) == T_ID )
 322         {
 323           const char *name = lex_tokcstr (lexer);
 324           if ( dict_lookup_var (cmd->dict, name) != NULL )
 325             {
 326               msg (SE, _("Variable %s already exists."), name);
 327               return false;
 328             }
 329
 330           if ( var_count >= subcase_get_n_fields (&cmd->sc) )
 331             {
 332               msg (SE, _("Too many variables in INTO clause."));
 333               return false;
 334             }
 335           rs->destvars[var_count] =
 336             create_rank_variable (cmd->dict, rs->rfunc, cmd->vars[var_count], name);
 337           ++var_count;
 338           lex_get (lexer);
 339         }
 340     }
 341
 342   return true;
 343 }
 344
 345 /* Hardly a rank function !! */
 346 static double
 347 rank_n (const struct rank *cmd UNUSED, double c UNUSED, double cc UNUSED, double cc_1 UNUSED,
 348         int i UNUSED, double w)
 349 {
 350   return w;
 351 }
 352
 353
 354 static double
 355 rank_rank (const struct rank *cmd, double c, double cc, double cc_1,
 356            int i, double w UNUSED)
 357 {
 358   double rank;
 359
 360   if ( c >= 1.0 )
 361     {
 362       switch (cmd->ties)
 363         {
 364         case TIES_LOW:
 365           rank = cc_1 + 1;
 366           break;
 367         case TIES_HIGH:
 368           rank = cc;
 369           break;
 370         case TIES_MEAN:
 371           rank = cc_1 + (c + 1.0)/ 2.0;
 372           break;
 373         case TIES_CONDENSE:
 374           rank = i;
 375           break;
 376         default:
 377           NOT_REACHED ();
 378         }
 379     }
 380   else
 381     {
 382       switch (cmd->ties)
 383         {
 384         case TIES_LOW:
 385           rank = cc_1;
 386           break;
 387         case TIES_HIGH:
 388           rank = cc;
 389           break;
 390         case TIES_MEAN:
 391           rank = cc_1 + c / 2.0 ;
 392           break;
 393         case TIES_CONDENSE:
 394           rank = i;
 395           break;
 396         default:
 397           NOT_REACHED ();
 398         }
 399     }
 400
 401   return rank;
 402 }
 403
 404
 405 static double
 406 rank_rfraction (const struct rank *cmd, double c, double cc, double cc_1,
 407                 int i, double w)
 408 {
 409   return rank_rank (cmd, c, cc, cc_1, i, w) / w ;
 410 }
 411
 412
 413 static double
 414 rank_percent (const struct rank *cmd, double c, double cc, double cc_1,
 415               int i, double w)
 416 {
 417   return rank_rank (cmd, c, cc, cc_1, i, w) * 100.0 / w ;
 418 }
 419
 420
 421 static double
 422 rank_proportion (const struct rank *cmd, double c, double cc, double cc_1,
 423                  int i, double w)
 424 {
 425   const double r =  rank_rank (cmd, c, cc, cc_1, i, w) ;
 426
 427   double f;
 428
 429   switch ( cmd->fraction )
 430     {
 431     case FRAC_BLOM:
 432       f =  (r - 3.0/8.0) / (w + 0.25);
 433       break;
 434     case FRAC_RANKIT:
 435       f = (r - 0.5) / w ;
 436       break;
 437     case FRAC_TUKEY:
 438       f = (r - 1.0/3.0) / (w + 1.0/3.0);
 439       break;
 440     case FRAC_VW:
 441       f = r / ( w + 1.0);
 442       break;
 443     default:
 444       NOT_REACHED ();
 445     }
 446
 447
 448   return (f > 0) ? f : SYSMIS;
 449 }
 450
 451 static double
 452 rank_normal (const struct rank *cmd, double c, double cc, double cc_1,
 453              int i, double w)
 454 {
 455   double f = rank_proportion (cmd, c, cc, cc_1, i, w);
 456
 457   return gsl_cdf_ugaussian_Pinv (f);
 458 }
 459
 460 static double
 461 rank_ntiles (const struct rank *cmd, double c, double cc, double cc_1,
 462              int i, double w)
 463 {
 464   double r = rank_rank (cmd, c, cc, cc_1, i, w);
 465
 466
 467   return ( floor (( r * cmd->k_ntiles) / ( w + 1) ) + 1);
 468 }
 469
 470 /* Expected value of the order statistics from an exponential distribution */
 471 static double
 472 ee (int j, double w_star)
 473 {
 474   int k;
 475   double sum = 0.0;
 476
 477   for (k = 1 ; k <= j; k++)
 478     sum += 1.0 / ( w_star + 1 - k );
 479
 480   return sum;
 481 }
 482
 483
 484 static double
 485 rank_savage (const struct rank *cmd UNUSED, double c, double cc, double cc_1,
 486              int i UNUSED, double w)
 487 {
 488   double int_part;
 489   const int i_1 = floor (cc_1);
 490   const int i_2 = floor (cc);
 491
 492   const double w_star = (modf (w, &int_part) == 0 ) ? w : floor (w) + 1;
 493
 494   const double g_1 = cc_1 - i_1;
 495   const double g_2 = cc - i_2;
 496
 497   /* The second factor is infinite, when the first is zero.
 498      Therefore, evaluate the second, only when the first is non-zero */
 499   const double expr1 =  (1 - g_1) ? (1 - g_1) * ee(i_1+1, w_star) : ( 1 - g_1);
 500   const double expr2 =  g_2 ? g_2 * ee (i_2+1, w_star) : g_2 ;
 501
 502   if ( i_1 == i_2 )
 503     return ee (i_1 + 1, w_star) - 1;
 504
 505   if ( i_1 + 1 == i_2 )
 506     return ( ( expr1 + expr2 )/c ) - 1;
 507
 508   if ( i_1 + 2 <= i_2 )
 509     {
 510       int j;
 511       double sigma = 0.0;
 512       for (j = i_1 + 2 ; j <= i_2; ++j )
 513         sigma += ee (j, w_star);
 514       return ( (expr1 + expr2 + sigma) / c) -1;
 515     }
 516
 517   NOT_REACHED();
 518 }
 519
 520
 521 static void
 522 rank_sorted_file (struct casereader *input,
 523                   struct casewriter *output,
 524                   const struct dictionary *dict,
 525                   int dest_idx,
 526                   const struct rank *cmd
 527                   )
 528 {
 529   struct casereader *pass1, *pass2, *pass2_1;
 530   struct casegrouper *tie_grouper;
 531   struct ccase *c;
 532   double w = 0.0;
 533   double cc = 0.0;
 534   int tie_group = 1;
 535
 536   input = casereader_create_filter_missing (input, &cmd->vars[dest_idx], 1,
 537                                             cmd->exclude, NULL, output);
 538   input = casereader_create_filter_weight (input, dict, NULL, output);
 539
 540   casereader_split (input, &pass1, &pass2);
 541
 542   /* Pass 1: Get total group weight. */
 543   for (; (c = casereader_read (pass1)) != NULL; case_unref (c))
 544     w += dict_get_case_weight (dict, c, NULL);
 545   casereader_destroy (pass1);
 546
 547   /* Pass 2: Do ranking. */
 548   tie_grouper = casegrouper_create_vars (pass2, &cmd->vars[dest_idx], 1);
 549   while (casegrouper_get_next_group (tie_grouper, &pass2_1))
 550     {
 551       struct casereader *pass2_2;
 552       double cc_1 = cc;
 553       double tw = 0.0;
 554       int i;
 555
 556       pass2_2 = casereader_clone (pass2_1);
 557       taint_propagate (casereader_get_taint (pass2_2),
 558                        casewriter_get_taint (output));
 559
 560       /* Pass 2.1: Sum up weight for tied cases. */
 561       for (; (c = casereader_read (pass2_1)) != NULL; case_unref (c))
 562         tw += dict_get_case_weight (dict, c, NULL);
 563       cc += tw;
 564       casereader_destroy (pass2_1);
 565
 566       /* Pass 2.2: Rank tied cases. */
 567       while ((c = casereader_read (pass2_2)) != NULL)
 568         {
 569           c = case_unshare (c);
 570           for (i = 0; i < cmd->n_rs; ++i)
 571             {
 572               const struct variable *dst_var = cmd->rs[i].destvars[dest_idx];
 573               double *dst_value = &case_data_rw (c, dst_var)->f;
 574               *dst_value = rank_func[cmd->rs[i].rfunc] (cmd, tw, cc, cc_1, tie_group, w);
 575             }
 576           casewriter_write (output, c);
 577         }
 578       casereader_destroy (pass2_2);
 579
 580       tie_group++;
 581     }
 582   casegrouper_destroy (tie_grouper);
 583 }
 584
 585
 586 /* Transformation function to enumerate all the cases */
 587 static int
 588 create_resort_key (void *key_var_, struct ccase **cc, casenumber case_num)
 589 {
 590   struct variable *key_var = key_var_;
 591
 592   *cc = case_unshare (*cc);
 593   case_data_rw (*cc, key_var)->f = case_num;
 594
 595   return TRNS_CONTINUE;
 596 }
 597
 598 static bool
 599 rank_cmd (struct dataset *ds,  const struct rank *cmd);
 600
 601
 602 static const char *
 603 fraction_name (const struct rank *cmd)
 604 {
 605   static char name[10];
 606   switch (cmd->fraction )
 607     {
 608     case FRAC_BLOM:
 609       strcpy (name, "BLOM");
 610       break;
 611     case FRAC_RANKIT:
 612       strcpy (name, "RANKIT");
 613       break;
 614     case FRAC_TUKEY:
 615       strcpy (name, "TUKEY");
 616       break;
 617     case FRAC_VW:
 618       strcpy (name, "VW");
 619       break;
 620     default:
 621       NOT_REACHED ();
 622     }
 623   return name;
 624 }
 625
 626 /* Create a label on DEST_VAR, describing its derivation from SRC_VAR and F */
 627 static void
 628 create_var_label (struct rank *cmd, struct variable *dest_var,
 629                   const struct variable *src_var, enum RANK_FUNC f)
 630 {
 631   struct string label;
 632   ds_init_empty (&label);
 633
 634   if ( cmd->n_group_vars > 0 )
 635     {
 636       struct string group_var_str;
 637       int g;
 638
 639       ds_init_empty (&group_var_str);
 640
 641       for (g = 0 ; g < cmd->n_group_vars ; ++g )
 642         {
 643           if ( g > 0 ) ds_put_cstr (&group_var_str, " ");
 644           ds_put_cstr (&group_var_str, var_get_name (cmd->group_vars[g]));
 645         }
 646
 647       ds_put_format (&label, _("%s of %s by %s"), function_name[f],
 648                      var_get_name (src_var), ds_cstr (&group_var_str));
 649       ds_destroy (&group_var_str);
 650     }
 651   else
 652     ds_put_format (&label, _("%s of %s"),
 653                    function_name[f], var_get_name (src_var));
 654
 655   var_set_label (dest_var, ds_cstr (&label), false);
 656
 657   ds_destroy (&label);
 658 }
 659
 660 int
 661 cmd_rank (struct lexer *lexer, struct dataset *ds)
 662 {
 663   struct rank rank;
 664   struct variable *order;
 665   bool result = true;
 666   int i;
 667
 668   subcase_init_empty (&rank.sc);
 669
 670   rank.rs = NULL;
 671   rank.n_rs = 0;
 672   rank.exclude = MV_ANY;
 673   rank.n_group_vars = 0;
 674   rank.group_vars = NULL;
 675   rank.dict = dataset_dict (ds);
 676   rank.ties = TIES_MEAN;
 677   rank.fraction = FRAC_BLOM;
 678   rank.print = true;
 679   rank.pool = pool_create ();
 680
 681   if (lex_match_id (lexer, "VARIABLES"))
 682     lex_force_match (lexer, T_EQUALS);
 683
 684   if (!parse_sort_criteria (lexer, rank.dict,
 685                             &rank.sc,
 686                             &rank.vars,
 687                             &rank.ascending))
 688     goto error;
 689
 690   rank.n_vars = rank.sc.n_fields;
 691
 692   if (lex_match (lexer, T_BY) )
 693     {
 694       if ( ! parse_variables_const (lexer, rank.dict,
 695                                     &rank.group_vars, &rank.n_group_vars,
 696                                     PV_NO_DUPLICATE | PV_NO_SCRATCH))
 697         goto error;
 698     }
 699
 700
 701   while (lex_token (lexer) != T_ENDCMD )
 702     {
 703       lex_force_match (lexer, T_SLASH);
 704       if (lex_match_id (lexer, "TIES"))
 705         {
 706           lex_force_match (lexer, T_EQUALS);
 707           if (lex_match_id (lexer, "MEAN"))
 708             {
 709               rank.ties = TIES_MEAN;
 710             }
 711           else if (lex_match_id (lexer, "LOW"))
 712             {
 713               rank.ties = TIES_LOW;
 714             }
 715           else if (lex_match_id (lexer, "HIGH"))
 716             {
 717               rank.ties = TIES_HIGH;
 718             }
 719           else if (lex_match_id (lexer, "CONDENSE"))
 720             {
 721               rank.ties = TIES_CONDENSE;
 722             }
 723           else
 724             {
 725               lex_error (lexer, NULL);
 726               goto error;
 727             }
 728         }
 729       else if (lex_match_id (lexer, "FRACTION"))
 730         {
 731           lex_force_match (lexer, T_EQUALS);
 732           if (lex_match_id (lexer, "BLOM"))
 733             {
 734               rank.fraction = FRAC_BLOM;
 735             }
 736           else if (lex_match_id (lexer, "TUKEY"))
 737             {
 738               rank.fraction = FRAC_TUKEY;
 739             }
 740           else if (lex_match_id (lexer, "VW"))
 741             {
 742               rank.fraction = FRAC_VW;
 743             }
 744           else if (lex_match_id (lexer, "RANKIT"))
 745             {
 746               rank.fraction = FRAC_RANKIT;
 747             }
 748           else
 749             {
 750               lex_error (lexer, NULL);
 751               goto error;
 752             }
 753         }
 754       else if (lex_match_id (lexer, "PRINT"))
 755         {
 756           lex_force_match (lexer, T_EQUALS);
 757           if (lex_match_id (lexer, "YES"))
 758             {
 759               rank.print = true;
 760             }
 761           else if (lex_match_id (lexer, "NO"))
 762             {
 763               rank.print = false;
 764             }
 765           else
 766             {
 767               lex_error (lexer, NULL);
 768               goto error;
 769             }
 770         }
 771       else if (lex_match_id (lexer, "MISSING"))
 772         {
 773           lex_force_match (lexer, T_EQUALS);
 774           if (lex_match_id (lexer, "INCLUDE"))
 775             {
 776               rank.exclude = MV_SYSTEM;
 777             }
 778           else if (lex_match_id (lexer, "EXCLUDE"))
 779             {
 780               rank.exclude = MV_ANY;
 781             }
 782           else
 783             {
 784               lex_error (lexer, NULL);
 785               goto error;
 786             }
 787         }
 788       else if (! parse_into (lexer, &rank))
 789         goto error;
 790     }
 791
 792
 793   /* If no rank specs are given, then apply a default */
 794   if ( rank.n_rs == 0)
 795     {
 796       rank.rs = pool_calloc (rank.pool, 1, sizeof (*rank.rs));
 797       rank.n_rs = 1;
 798       rank.rs[0].rfunc = RANK;
 799       rank.rs[0].destvars = pool_calloc (rank.pool, rank.n_vars, sizeof (*rank.rs[0].destvars));
 800     }
 801
 802   /* Create variables for all rank destinations which haven't
 803      already been created with INTO.
 804      Add labels to all the destination variables.
 805   */
 806   for (i = 0 ; i <  rank.n_rs ; ++i )
 807     {
 808       int v;
 809       struct rank_spec *rs = &rank.rs[i];
 810
 811       for ( v = 0 ; v < rank.n_vars ;  v ++ )
 812         {
 813           if ( rs->destvars[v] == NULL )
 814             {
 815               rs->destvars[v] =
 816                 create_rank_variable (rank.dict, rs->rfunc, rank.vars[v], NULL);
 817             }
 818
 819           create_var_label (&rank, rs->destvars[v],
 820                             rank.vars[v],
 821                             rs->rfunc);
 822         }
 823     }
 824
 825   if ( rank.print )
 826     {
 827       int v;
 828
 829       tab_output_text (0, _("Variables Created By RANK"));
 830       tab_output_text (0, "");
 831
 832       for (i = 0 ; i <  rank.n_rs ; ++i )
 833         {
 834           for ( v = 0 ; v < rank.n_vars ;  v ++ )
 835             {
 836               if ( rank.n_group_vars > 0 )
 837                 {
 838                   struct string varlist;
 839                   int g;
 840
 841                   ds_init_empty (&varlist);
 842                   for ( g = 0 ; g < rank.n_group_vars ; ++g )
 843                     {
 844                       ds_put_cstr (&varlist, var_get_name (rank.group_vars[g]));
 845
 846                       if ( g < rank.n_group_vars - 1)
 847                         ds_put_cstr (&varlist, " ");
 848                     }
 849
 850                   if ( rank.rs[i].rfunc == NORMAL ||
 851                        rank.rs[i].rfunc == PROPORTION )
 852                     tab_output_text_format (0,
 853                                             _("%s into %s(%s of %s using %s BY %s)"),
 854                                             var_get_name (rank.vars[v]),
 855                                             var_get_name (rank.rs[i].destvars[v]),
 856                                             function_name[rank.rs[i].rfunc],
 857                                             var_get_name (rank.vars[v]),
 858                                             fraction_name (&rank),
 859                                             ds_cstr (&varlist));
 860
 861                   else
 862                     tab_output_text_format (0,
 863                                             _("%s into %s(%s of %s BY %s)"),
 864                                             var_get_name (rank.vars[v]),
 865                                             var_get_name (rank.rs[i].destvars[v]),
 866                                             function_name[rank.rs[i].rfunc],
 867                                             var_get_name (rank.vars[v]),
 868                                             ds_cstr (&varlist));
 869                   ds_destroy (&varlist);
 870                 }
 871               else
 872                 {
 873                   if ( rank.rs[i].rfunc == NORMAL ||
 874                        rank.rs[i].rfunc == PROPORTION )
 875                     tab_output_text_format (0,
 876                                             _("%s into %s(%s of %s using %s)"),
 877                                             var_get_name (rank.vars[v]),
 878                                             var_get_name (rank.rs[i].destvars[v]),
 879                                             function_name[rank.rs[i].rfunc],
 880                                             var_get_name (rank.vars[v]),
 881                                             fraction_name (&rank));
 882
 883                   else
 884                     tab_output_text_format (0,
 885                                             _("%s into %s(%s of %s)"),
 886                                             var_get_name (rank.vars[v]),
 887                                             var_get_name (rank.rs[i].destvars[v]),
 888                                             function_name[rank.rs[i].rfunc],
 889                                             var_get_name (rank.vars[v]));
 890                 }
 891             }
 892         }
 893     }
 894
 895   /* Add a variable which we can sort by to get back the original
 896      order */
 897   order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
 898
 899   add_transformation (ds, create_resort_key, 0, order);
 900
 901   /* Do the ranking */
 902   result = rank_cmd (ds, &rank);
 903
 904   /* Put the active dataset back in its original order.  Delete
 905      our sort key, which we don't need anymore.  */
 906   {
 907     struct casereader *sorted;
 908
 909
 910     /* FIXME: loses error conditions. */
 911
 912     proc_discard_output (ds);
 913     sorted = sort_execute_1var (proc_open (ds), order);
 914     result = proc_commit (ds) && result;
 915
 916     dict_delete_var (dataset_dict (ds), order);
 917     result = dataset_set_source (ds, sorted) && result;
 918     if ( result != true)
 919       goto error;
 920   }
 921
 922   destroy_rank (&rank);
 923   return CMD_SUCCESS;
 924
 925  error:
 926
 927   destroy_rank (&rank);
 928   return CMD_FAILURE;
 929 }
 930
 931
 932
 933 static bool
 934 rank_cmd (struct dataset *ds, const struct rank *cmd)
 935 {
 936   struct dictionary *d = dataset_dict (ds);
 937   bool ok = true;
 938   int i;
 939
 940   for (i = 0 ; i < subcase_get_n_fields (&cmd->sc) ; ++i )
 941     {
 942       /* Rank variable at index I in SC. */
 943       struct casegrouper *split_grouper;
 944       struct casereader *split_group;
 945       struct casewriter *output;
 946
 947       proc_discard_output (ds);
 948       split_grouper = casegrouper_create_splits (proc_open (ds), d);
 949       output = autopaging_writer_create (dict_get_proto (d));
 950
 951       while (casegrouper_get_next_group (split_grouper, &split_group))
 952         {
 953           struct subcase ordering;
 954           struct casereader *ordered;
 955           struct casegrouper *by_grouper;
 956           struct casereader *by_group;
 957
 958           /* Sort this split group by the BY variables as primary
 959              keys and the rank variable as secondary key. */
 960           subcase_init_vars (&ordering, cmd->group_vars, cmd->n_group_vars);
 961           subcase_add_var (&ordering, cmd->vars[i],
 962                            subcase_get_direction (&cmd->sc, i));
 963           ordered = sort_execute (split_group, &ordering);
 964           subcase_destroy (&ordering);
 965
 966           /* Rank the rank variable within this split group. */
 967           by_grouper = casegrouper_create_vars (ordered,
 968                                                 cmd->group_vars, cmd->n_group_vars);
 969           while (casegrouper_get_next_group (by_grouper, &by_group))
 970             {
 971               /* Rank the rank variable within this BY group
 972                  within the split group. */
 973
 974               rank_sorted_file (by_group, output, d,  i, cmd);
 975
 976             }
 977           ok = casegrouper_destroy (by_grouper) && ok;
 978         }
 979       ok = casegrouper_destroy (split_grouper);
 980       ok = proc_commit (ds) && ok;
 981       ok = (dataset_set_source (ds, casewriter_make_reader (output))
 982             && ok);
 983       if (!ok)
 984         break;
 985     }
 986
 987   return ok;
 988 }