src/language/stats/rank.q

   1 /* PSPP - RANK. -*-c-*-
   2
   3 Copyright (C) 2005, 2006 Free Software Foundation, Inc.
   4 Author: John Darrington <john@darrington.wattle.id.au>,
   5         Ben Pfaff <blp@gnu.org>.
   6
   7 This program is free software; you can redistribute it and/or
   8 modify it under the terms of the GNU General Public License as
   9 published by the Free Software Foundation; either version 2 of the
  10 License, or (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful, but
  13 WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software
  19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA. */
  21
  22 #include <config.h>
  23
  24 #include "sort-criteria.h"
  25
  26 #include <data/dictionary.h>
  27 #include <data/procedure.h>
  28 #include <data/variable.h>
  29 #include <data/case.h>
  30 #include <data/casefile.h>
  31 #include <data/fastfile.h>
  32 #include <data/storage-stream.h>
  33 #include <language/command.h>
  34 #include <language/stats/sort-criteria.h>
  35 #include <limits.h>
  36 #include <libpspp/compiler.h>
  37 #include <math/sort.h>
  38 #include <output/table.h>
  39 #include <output/manager.h>
  40
  41 #include <gsl/gsl_cdf.h>
  42 #include <math.h>
  43
  44 #include "gettext.h"
  45 #define _(msgid) gettext (msgid)
  46
  47 /* (headers) */
  48
  49 /* (specification)
  50    "RANK" (rank_):
  51    *^variables=custom;
  52    +rank=custom;
  53    +normal=custom;
  54    +percent=custom;
  55    +ntiles=custom;
  56    +rfraction=custom;
  57    +proportion=custom;
  58    +n=custom;
  59    +savage=custom;
  60    +print=print:!yes/no;
  61    +fraction=fraction:!blom/tukey/vw/rankit;
  62    +ties=ties:!mean/low/high/condense;
  63    missing=miss:!exclude/include.
  64 */
  65 /* (declarations) */
  66 /* (functions) */
  67
  68 typedef double (*rank_function_t) (double c, double cc, double cc_1,
  69                                  int i, double w);
  70
  71 static double rank_proportion (double c, double cc, double cc_1,
  72                                int i, double w);
  73
  74 static double rank_normal (double c, double cc, double cc_1,
  75                            int i, double w);
  76
  77 static double rank_percent (double c, double cc, double cc_1,
  78                             int i, double w);
  79
  80 static double rank_rfraction (double c, double cc, double cc_1,
  81                               int i, double w);
  82
  83 static double rank_rank (double c, double cc, double cc_1,
  84                          int i, double w);
  85
  86 static double rank_n (double c, double cc, double cc_1,
  87                       int i, double w);
  88
  89 static double rank_savage (double c, double cc, double cc_1,
  90                       int i, double w);
  91
  92 static double rank_ntiles (double c, double cc, double cc_1,
  93                       int i, double w);
  94
  95
  96 enum RANK_FUNC
  97   {
  98     RANK,
  99     NORMAL,
 100     PERCENT,
 101     RFRACTION,
 102     PROPORTION,
 103     N,
 104     NTILES,
 105     SAVAGE,
 106     n_RANK_FUNCS
 107   };
 108
 109 static const struct fmt_spec dest_format[n_RANK_FUNCS] = {
 110   {FMT_F, 9, 3}, /* rank */
 111   {FMT_F, 6, 4}, /* normal */
 112   {FMT_F, 6, 2}, /* percent */
 113   {FMT_F, 6, 4}, /* rfraction */
 114   {FMT_F, 6, 4}, /* proportion */
 115   {FMT_F, 6, 0}, /* n */
 116   {FMT_F, 3, 0}, /* ntiles */
 117   {FMT_F, 8, 4}  /* savage */
 118 };
 119
 120 static const char * const function_name[n_RANK_FUNCS] = {
 121   "RANK",
 122   "NORMAL",
 123   "PERCENT",
 124   "RFRACTION",
 125   "PROPORTION",
 126   "N",
 127   "NTILES",
 128   "SAVAGE"
 129 };
 130
 131 static const rank_function_t rank_func[n_RANK_FUNCS] = {
 132   rank_rank,
 133   rank_normal,
 134   rank_percent,
 135   rank_rfraction,
 136   rank_proportion,
 137   rank_n,
 138   rank_ntiles,
 139   rank_savage
 140   };
 141
 142
 143 struct rank_spec
 144 {
 145   enum RANK_FUNC rfunc;
 146   struct variable **destvars;
 147 };
 148
 149
 150 /* Function to use for testing for missing values */
 151 static is_missing_func *value_is_missing;
 152
 153 static struct rank_spec *rank_specs;
 154 static size_t n_rank_specs;
 155
 156 static struct sort_criteria *sc;
 157
 158 static struct variable **group_vars;
 159 static size_t n_group_vars;
 160
 161 static struct variable **src_vars;
 162 static size_t n_src_vars;
 163
 164
 165 static int k_ntiles;
 166
 167 static struct cmd_rank cmd;
 168
 169 static struct casefile *rank_sorted_casefile (struct casefile *cf,
 170                                               const struct sort_criteria *,
 171                                               const struct dictionary *,
 172                                               const struct rank_spec *rs,
 173                                               int n_rank_specs,
 174                                               int idx,
 175                                               const struct missing_values *miss
 176                                               );
 177 static const char *
 178 fraction_name(void)
 179 {
 180   static char name[10];
 181   switch ( cmd.fraction )
 182     {
 183     case RANK_BLOM:
 184       strcpy (name, "BLOM");
 185       break;
 186     case RANK_RANKIT:
 187       strcpy (name, "RANKIT");
 188       break;
 189     case RANK_TUKEY:
 190       strcpy (name, "TUKEY");
 191       break;
 192     case RANK_VW:
 193       strcpy (name, "VW");
 194       break;
 195     default:
 196       NOT_REACHED ();
 197     }
 198   return name;
 199 }
 200
 201 /* Create a label on DEST_VAR, describing its derivation from SRC_VAR and F */
 202 static void
 203 create_var_label (struct variable *dest_var,
 204                   const struct variable *src_var, enum RANK_FUNC f)
 205 {
 206   struct string label;
 207   ds_init_empty (&label);
 208
 209   if ( n_group_vars > 0 )
 210     {
 211       struct string group_var_str;
 212       int g;
 213
 214       ds_init_empty (&group_var_str);
 215
 216       for (g = 0 ; g < n_group_vars ; ++g )
 217         {
 218           if ( g > 0 ) ds_put_cstr (&group_var_str, " ");
 219           ds_put_cstr (&group_var_str, group_vars[g]->name);
 220         }
 221
 222       ds_put_format (&label, _("%s of %s by %s"), function_name[f],
 223                      src_var->name, ds_cstr (&group_var_str));
 224       ds_destroy (&group_var_str);
 225     }
 226   else
 227     ds_put_format (&label,_("%s of %s"), function_name[f], src_var->name);
 228
 229   dest_var->label = strdup (ds_cstr (&label) );
 230
 231   ds_destroy (&label);
 232 }
 233
 234
 235 static bool
 236 rank_cmd (struct dataset *ds, const struct sort_criteria *sc,
 237           const struct rank_spec *rank_specs, int n_rank_specs)
 238 {
 239   struct sort_criteria criteria;
 240   bool result = true;
 241   int i;
 242   const int n_splits = dict_get_split_cnt (dataset_dict (ds));
 243
 244   criteria.crit_cnt = n_splits + n_group_vars + 1;
 245   criteria.crits = xnmalloc (criteria.crit_cnt, sizeof *criteria.crits);
 246   for (i = 0; i < n_splits ; i++)
 247     {
 248       struct variable *v = dict_get_split_vars (dataset_dict (ds))[i];
 249       criteria.crits[i].fv = v->fv;
 250       criteria.crits[i].width = v->width;
 251       criteria.crits[i].dir = SRT_ASCEND;
 252     }
 253   for (i = 0; i < n_group_vars; i++)
 254     {
 255       criteria.crits[i + n_splits].fv = group_vars[i]->fv;
 256       criteria.crits[i + n_splits].width = group_vars[i]->width;
 257       criteria.crits[i + n_splits].dir = SRT_ASCEND;
 258     }
 259   for (i = 0 ; i < sc->crit_cnt ; ++i )
 260     {
 261       struct casefile *out ;
 262       struct casefile *cf ;
 263       struct casereader *reader ;
 264       struct casefile *sorted_cf ;
 265
 266       /* Obtain active file in CF. */
 267       if (!procedure (ds, NULL, NULL))
 268         goto error;
 269
 270       cf = proc_capture_output (ds);
 271
 272       /* Sort CF into SORTED_CF. */
 273       reader = casefile_get_destructive_reader (cf) ;
 274       criteria.crits[criteria.crit_cnt - 1] = sc->crits[i];
 275       assert ( sc->crits[i].fv == src_vars[i]->fv );
 276       sorted_cf = sort_execute (reader, &criteria);
 277       casefile_destroy (cf);
 278
 279       out = rank_sorted_casefile (sorted_cf, &criteria,
 280                                   dataset_dict (ds),
 281                                   rank_specs, n_rank_specs,
 282                                   i, &src_vars[i]->miss)  ;
 283       if ( NULL == out )
 284         {
 285           result = false ;
 286           continue ;
 287         }
 288
 289       proc_set_source (ds, storage_source_create (out));
 290     }
 291
 292   free (criteria.crits);
 293   return result ;
 294
 295 error:
 296   free (criteria.crits);
 297   return false ;
 298 }
 299
 300 /* Hardly a rank function !! */
 301 static double
 302 rank_n (double c UNUSED, double cc UNUSED, double cc_1 UNUSED,
 303           int i UNUSED, double w)
 304 {
 305   return w;
 306 }
 307
 308
 309 static double
 310 rank_rank (double c, double cc, double cc_1,
 311           int i, double w UNUSED)
 312 {
 313   double rank;
 314   if ( c >= 1.0 )
 315     {
 316       switch (cmd.ties)
 317         {
 318         case RANK_LOW:
 319           rank = cc_1 + 1;
 320           break;
 321         case RANK_HIGH:
 322           rank = cc;
 323           break;
 324         case RANK_MEAN:
 325           rank = cc_1 + (c + 1.0)/ 2.0;
 326           break;
 327         case RANK_CONDENSE:
 328           rank = i;
 329           break;
 330         default:
 331           NOT_REACHED ();
 332         }
 333     }
 334   else
 335     {
 336       switch (cmd.ties)
 337         {
 338         case RANK_LOW:
 339           rank = cc_1;
 340           break;
 341         case RANK_HIGH:
 342           rank = cc;
 343           break;
 344         case RANK_MEAN:
 345           rank = cc_1 + c / 2.0 ;
 346           break;
 347         case RANK_CONDENSE:
 348           rank = i;
 349           break;
 350         default:
 351           NOT_REACHED ();
 352         }
 353     }
 354
 355   return rank;
 356 }
 357
 358
 359 static double
 360 rank_rfraction (double c, double cc, double cc_1,
 361                 int i, double w)
 362 {
 363   return rank_rank (c, cc, cc_1, i, w) / w ;
 364 }
 365
 366
 367 static double
 368 rank_percent (double c, double cc, double cc_1,
 369                 int i, double w)
 370 {
 371   return rank_rank (c, cc, cc_1, i, w) * 100.0 / w ;
 372 }
 373
 374
 375 static double
 376 rank_proportion (double c, double cc, double cc_1,
 377                  int i, double w)
 378 {
 379   const double r =  rank_rank (c, cc, cc_1, i, w) ;
 380
 381   double f;
 382
 383   switch ( cmd.fraction )
 384     {
 385     case RANK_BLOM:
 386       f =  (r - 3.0/8.0) / (w + 0.25);
 387       break;
 388     case RANK_RANKIT:
 389       f = (r - 0.5) / w ;
 390       break;
 391     case RANK_TUKEY:
 392       f = (r - 1.0/3.0) / (w + 1.0/3.0);
 393       break;
 394     case RANK_VW:
 395       f = r / ( w + 1.0);
 396       break;
 397     default:
 398       NOT_REACHED ();
 399     }
 400
 401
 402   return (f > 0) ? f : SYSMIS;
 403 }
 404
 405 static double
 406 rank_normal (double c, double cc, double cc_1,
 407              int i, double w)
 408 {
 409   double f = rank_proportion (c, cc, cc_1, i, w);
 410
 411   return gsl_cdf_ugaussian_Pinv (f);
 412 }
 413
 414 static double
 415 rank_ntiles (double c, double cc, double cc_1,
 416                 int i, double w)
 417 {
 418   double r = rank_rank (c, cc, cc_1, i, w);
 419
 420
 421   return ( floor (( r * k_ntiles) / ( w + 1) ) + 1);
 422 }
 423
 424 /* Expected value of the order statistics from an exponential distribution */
 425 static double
 426 ee (int j, double w_star)
 427 {
 428   int k;
 429   double sum = 0.0;
 430
 431   for (k = 1 ; k <= j; k++)
 432     sum += 1.0 / ( w_star + 1 - k );
 433
 434   return sum;
 435 }
 436
 437
 438 static double
 439 rank_savage (double c, double cc, double cc_1,
 440                 int i UNUSED, double w)
 441 {
 442   double int_part;
 443   const int i_1 = floor (cc_1);
 444   const int i_2 = floor (cc);
 445
 446   const double w_star = (modf (w, &int_part) == 0 ) ? w : floor (w) + 1;
 447
 448   const double g_1 = cc_1 - i_1;
 449   const double g_2 = cc - i_2;
 450
 451   /* The second factor is infinite, when the first is zero.
 452      Therefore, evaluate the second, only when the first is non-zero */
 453   const double expr1 =  (1 - g_1) ? (1 - g_1) * ee(i_1+1, w_star) : ( 1 - g_1);
 454   const double expr2 =  g_2 ? g_2 * ee (i_2+1, w_star) : g_2 ;
 455
 456   if ( i_1 == i_2 )
 457     return ee (i_1 + 1, w_star) - 1;
 458
 459   if ( i_1 + 1 == i_2 )
 460     return ( ( expr1 + expr2 )/c ) - 1;
 461
 462   if ( i_1 + 2 <= i_2 )
 463     {
 464       int j;
 465       double sigma = 0.0;
 466       for (j = i_1 + 2 ; j <= i_2; ++j )
 467         sigma += ee (j, w_star);
 468       return ( (expr1 + expr2 + sigma) / c) -1;
 469     }
 470
 471   NOT_REACHED();
 472 }
 473
 474
 475 /* Rank the casefile belonging to CR, starting from the current
 476    postition of CR continuing up to and including the ENDth case.
 477
 478    RS points to an array containing  the rank specifications to
 479    use. N_RANK_SPECS is the number of elements of RS.
 480
 481
 482    DEST_VAR_INDEX is the index into the rank_spec destvar element
 483    to be used for this ranking.
 484
 485    Prerequisites: 1. The casefile must be sorted according to CRITERION.
 486                   2. W is the sum of the non-missing caseweights for this
 487                   range of the casefile.
 488 */
 489 static void
 490 rank_cases (struct casereader *cr,
 491             unsigned long end,
 492             const struct dictionary *dict,
 493             const struct sort_criterion *criterion,
 494             const struct missing_values *mv,
 495             double w,
 496             const struct rank_spec *rs,
 497             int n_rank_specs,
 498             int dest_var_index,
 499             struct casefile *dest)
 500 {
 501   bool warn = true;
 502   double cc = 0.0;
 503   double cc_1;
 504   int iter = 1;
 505
 506   const int fv = criterion->fv;
 507   const int width = criterion->width;
 508
 509   while (casereader_cnum (cr) < end)
 510     {
 511       struct casereader *lookahead;
 512       const union value *this_value;
 513       struct ccase this_case, lookahead_case;
 514       double c;
 515       int i;
 516       size_t n = 0;
 517
 518       if (!casereader_read_xfer (cr, &this_case))
 519         break;
 520
 521       this_value = case_data (&this_case, fv);
 522       c = dict_get_case_weight (dict, &this_case, &warn);
 523
 524       lookahead = casereader_clone (cr);
 525       n = 0;
 526       while (casereader_cnum (lookahead) < end
 527              && casereader_read_xfer (lookahead, &lookahead_case))
 528         {
 529           const union value *lookahead_value = case_data (&lookahead_case, fv);
 530           int diff = compare_values (this_value, lookahead_value, width);
 531
 532           if (diff != 0)
 533             {
 534               /* Make sure the casefile was sorted */
 535               assert ( diff == ((criterion->dir == SRT_ASCEND) ? -1 :1));
 536
 537               case_destroy (&lookahead_case);
 538               break;
 539             }
 540
 541           c += dict_get_case_weight (dict, &lookahead_case, &warn);
 542           case_destroy (&lookahead_case);
 543           n++;
 544         }
 545       casereader_destroy (lookahead);
 546
 547       cc_1 = cc;
 548       if ( !value_is_missing (mv, this_value) )
 549         cc += c;
 550
 551       do
 552         {
 553           for (i = 0; i < n_rank_specs; ++i)
 554             {
 555               const int dest_idx = rs[i].destvars[dest_var_index]->fv;
 556
 557               if  ( value_is_missing (mv, this_value) )
 558                 case_data_rw (&this_case, dest_idx)->f = SYSMIS;
 559               else
 560                 case_data_rw (&this_case, dest_idx)->f =
 561                   rank_func[rs[i].rfunc](c, cc, cc_1, iter, w);
 562             }
 563           casefile_append_xfer (dest, &this_case);
 564         }
 565       while (n-- > 0 && casereader_read_xfer (cr, &this_case));
 566
 567       if ( !value_is_missing (mv, this_value) )
 568         iter++;
 569     }
 570
 571   /* If this isn't true, then all the results will be wrong */
 572   assert ( w == cc );
 573 }
 574
 575 static bool
 576 same_group (const struct ccase *a, const struct ccase *b,
 577             const struct sort_criteria *crit)
 578 {
 579   size_t i;
 580
 581   for (i = 0; i < crit->crit_cnt - 1; i++)
 582     {
 583       struct sort_criterion *c = &crit->crits[i];
 584       if (compare_values (case_data (a, c->fv), case_data (b, c->fv),
 585                           c->width) != 0)
 586         return false;
 587     }
 588
 589   return true;
 590 }
 591
 592 static struct casefile *
 593 rank_sorted_casefile (struct casefile *cf,
 594                       const struct sort_criteria *crit,
 595                       const struct dictionary *dict,
 596                       const struct rank_spec *rs,
 597                       int n_rank_specs,
 598                       int dest_idx,
 599                       const struct missing_values *mv)
 600 {
 601   struct casefile *dest = fastfile_create (casefile_get_value_cnt (cf));
 602   struct casereader *lookahead = casefile_get_reader (cf, NULL);
 603   struct casereader *pos = casereader_clone (lookahead);
 604   struct ccase group_case;
 605   bool warn = true;
 606
 607   struct sort_criterion *ultimate_crit = &crit->crits[crit->crit_cnt - 1];
 608
 609   if (casereader_read (lookahead, &group_case))
 610     {
 611       struct ccase this_case;
 612       const union value *this_value ;
 613       double w = 0.0;
 614       this_value = case_data( &group_case, ultimate_crit->fv);
 615
 616       if ( !value_is_missing(mv, this_value) )
 617         w = dict_get_case_weight (dict, &group_case, &warn);
 618
 619       while (casereader_read (lookahead, &this_case))
 620         {
 621           const union value *this_value =
 622             case_data(&this_case, ultimate_crit->fv);
 623           double c = dict_get_case_weight (dict, &this_case, &warn);
 624           if (!same_group (&group_case, &this_case, crit))
 625             {
 626               rank_cases (pos, casereader_cnum (lookahead) - 1,
 627                           dict,
 628                           ultimate_crit,
 629                           mv, w,
 630                           rs, n_rank_specs,
 631                           dest_idx, dest);
 632
 633               w = 0.0;
 634               case_destroy (&group_case);
 635               case_move (&group_case, &this_case);
 636             }
 637           if ( !value_is_missing (mv, this_value) )
 638             w += c;
 639           case_destroy (&this_case);
 640         }
 641       case_destroy (&group_case);
 642       rank_cases (pos, ULONG_MAX, dict, ultimate_crit, mv, w,
 643                   rs, n_rank_specs, dest_idx, dest);
 644     }
 645
 646   if (casefile_error (dest))
 647     {
 648       casefile_destroy (dest);
 649       dest = NULL;
 650     }
 651
 652   casefile_destroy (cf);
 653   return dest;
 654 }
 655
 656
 657 /* Transformation function to enumerate all the cases */
 658 static int
 659 create_resort_key (void *key_var_, struct ccase *cc, casenumber case_num)
 660 {
 661   struct variable *key_var = key_var_;
 662
 663   case_data_rw(cc, key_var->fv)->f = case_num;
 664
 665   return TRNS_CONTINUE;
 666 }
 667
 668
 669 /* Create and return a new variable in which to store the ranks of SRC_VAR
 670    accoring to the rank function F.
 671    VNAME is the name of the variable to be created.
 672    If VNAME is NULL, then a name will be automatically chosen.
 673  */
 674 static struct variable *
 675 create_rank_variable (struct dictionary *dict, enum RANK_FUNC f,
 676                       const struct variable *src_var,
 677                       const char *vname)
 678 {
 679   int i;
 680   struct variable *var = NULL;
 681   char name[SHORT_NAME_LEN + 1];
 682
 683   if ( vname )
 684     var = dict_create_var(dict, vname, 0);
 685
 686   if ( NULL == var )
 687     {
 688       snprintf(name, SHORT_NAME_LEN + 1, "%c%s",
 689                function_name[f][0], src_var->name);
 690
 691       var = dict_create_var(dict, name, 0);
 692     }
 693
 694   i = 1;
 695   while( NULL == var )
 696     {
 697       char func_abb[4];
 698       snprintf(func_abb, 4, "%s", function_name[f]);
 699       snprintf(name, SHORT_NAME_LEN + 1, "%s%03d", func_abb,
 700                i);
 701
 702       var = dict_create_var(dict, name, 0);
 703       if (i++ >= 999)
 704         break;
 705     }
 706
 707   i = 1;
 708   while ( NULL == var )
 709     {
 710       char func_abb[3];
 711       snprintf(func_abb, 3, "%s", function_name[f]);
 712
 713       snprintf(name, SHORT_NAME_LEN + 1,
 714                "RNK%s%02d", func_abb, i);
 715
 716       var = dict_create_var(dict, name, 0);
 717       if ( i++ >= 99 )
 718         break;
 719     }
 720
 721   if ( NULL == var )
 722     {
 723       msg(ME, _("Cannot create new rank variable.  All candidates in use."));
 724       return NULL;
 725     }
 726
 727   var->write = var->print = dest_format[f];
 728
 729   return var;
 730 }
 731
 732
 733 static void
 734 rank_cleanup(void)
 735 {
 736   int i;
 737
 738   free (group_vars);
 739   group_vars = NULL;
 740   n_group_vars = 0;
 741
 742   for (i = 0 ; i <  n_rank_specs ; ++i )
 743     {
 744       free (rank_specs[i].destvars);
 745     }
 746
 747   free (rank_specs);
 748   rank_specs = NULL;
 749   n_rank_specs = 0;
 750
 751   sort_destroy_criteria (sc);
 752   sc = NULL;
 753
 754   free (src_vars);
 755   src_vars = NULL;
 756   n_src_vars = 0;
 757 }
 758
 759 int
 760 cmd_rank (struct lexer *lexer, struct dataset *ds)
 761 {
 762   bool result;
 763   struct variable *order;
 764   size_t i;
 765   n_rank_specs = 0;
 766
 767   if ( !parse_rank (lexer, ds, &cmd, NULL) )
 768     {
 769       rank_cleanup ();
 770     return CMD_FAILURE;
 771     }
 772
 773   /* If /MISSING=INCLUDE is set, then user missing values are ignored */
 774   if (cmd.miss == RANK_INCLUDE )
 775     value_is_missing = mv_is_value_system_missing;
 776   else
 777     value_is_missing = mv_is_value_missing;
 778
 779
 780   /* Default to /RANK if no function subcommands are given */
 781   if ( !( cmd.sbc_normal  || cmd.sbc_ntiles || cmd.sbc_proportion ||
 782           cmd.sbc_rfraction || cmd.sbc_savage || cmd.sbc_n ||
 783           cmd.sbc_percent || cmd.sbc_rank ) )
 784     {
 785       assert ( n_rank_specs == 0 );
 786
 787       rank_specs = xmalloc (sizeof (*rank_specs));
 788       rank_specs[0].rfunc = RANK;
 789       rank_specs[0].destvars =
 790         xcalloc (sc->crit_cnt, sizeof (struct variable *));
 791
 792       n_rank_specs = 1;
 793     }
 794
 795   assert ( sc->crit_cnt == n_src_vars);
 796
 797   /* Create variables for all rank destinations which haven't
 798      already been created with INTO.
 799      Add labels to all the destination variables.
 800   */
 801   for (i = 0 ; i <  n_rank_specs ; ++i )
 802     {
 803       int v;
 804       for ( v = 0 ; v < n_src_vars ;  v ++ )
 805         {
 806           if ( rank_specs[i].destvars[v] == NULL )
 807             {
 808               rank_specs[i].destvars[v] =
 809                 create_rank_variable (dataset_dict(ds), rank_specs[i].rfunc, src_vars[v], NULL);
 810             }
 811
 812           create_var_label ( rank_specs[i].destvars[v],
 813                              src_vars[v],
 814                              rank_specs[i].rfunc);
 815         }
 816     }
 817
 818   if ( cmd.print == RANK_YES )
 819     {
 820       int v;
 821
 822       tab_output_text (0, _("Variables Created By RANK"));
 823       tab_output_text (0, "\n");
 824
 825       for (i = 0 ; i <  n_rank_specs ; ++i )
 826         {
 827           for ( v = 0 ; v < n_src_vars ;  v ++ )
 828             {
 829               if ( n_group_vars > 0 )
 830                 {
 831                   struct string varlist;
 832                   int g;
 833
 834                   ds_init_empty (&varlist);
 835                   for ( g = 0 ; g < n_group_vars ; ++g )
 836                     {
 837                       ds_put_cstr (&varlist, group_vars[g]->name);
 838
 839                       if ( g < n_group_vars - 1)
 840                         ds_put_cstr (&varlist, " ");
 841                     }
 842
 843                   if ( rank_specs[i].rfunc == NORMAL ||
 844                        rank_specs[i].rfunc == PROPORTION )
 845                     tab_output_text (TAT_PRINTF,
 846                                      _("%s into %s(%s of %s using %s BY %s)"),
 847                                      src_vars[v]->name,
 848                                      rank_specs[i].destvars[v]->name,
 849                                      function_name[rank_specs[i].rfunc],
 850                                      src_vars[v]->name,
 851                                      fraction_name(),
 852                                      ds_cstr (&varlist)
 853                                      );
 854
 855                   else
 856                     tab_output_text (TAT_PRINTF,
 857                                      _("%s into %s(%s of %s BY %s)"),
 858                                      src_vars[v]->name,
 859                                      rank_specs[i].destvars[v]->name,
 860                                      function_name[rank_specs[i].rfunc],
 861                                      src_vars[v]->name,
 862                                      ds_cstr (&varlist)
 863                                      );
 864                   ds_destroy (&varlist);
 865                 }
 866               else
 867                 {
 868                   if ( rank_specs[i].rfunc == NORMAL ||
 869                        rank_specs[i].rfunc == PROPORTION )
 870                     tab_output_text (TAT_PRINTF,
 871                                      _("%s into %s(%s of %s using %s)"),
 872                                      src_vars[v]->name,
 873                                      rank_specs[i].destvars[v]->name,
 874                                      function_name[rank_specs[i].rfunc],
 875                                      src_vars[v]->name,
 876                                      fraction_name()
 877                                      );
 878
 879                   else
 880                     tab_output_text (TAT_PRINTF,
 881                                      _("%s into %s(%s of %s)"),
 882                                      src_vars[v]->name,
 883                                      rank_specs[i].destvars[v]->name,
 884                                      function_name[rank_specs[i].rfunc],
 885                                      src_vars[v]->name
 886                                      );
 887                 }
 888             }
 889         }
 890     }
 891
 892   if ( cmd.sbc_fraction &&
 893        ( ! cmd.sbc_normal && ! cmd.sbc_proportion) )
 894     msg(MW, _("FRACTION has been specified, but NORMAL and PROPORTION rank functions have not been requested.  The FRACTION subcommand will be ignored.") );
 895
 896   /* Add a variable which we can sort by to get back the original
 897      order */
 898   order = dict_create_var_assert (dataset_dict (ds), "$ORDER_", 0);
 899
 900   add_transformation (ds, create_resort_key, 0, order);
 901
 902   /* Do the ranking */
 903   result = rank_cmd (ds, sc, rank_specs, n_rank_specs);
 904
 905   /* Put the active file back in its original order */
 906   {
 907     struct sort_criteria criteria;
 908     struct sort_criterion restore_criterion ;
 909     restore_criterion.fv = order->fv;
 910     restore_criterion.width = 0;
 911     restore_criterion.dir = SRT_ASCEND;
 912
 913     criteria.crits = &restore_criterion;
 914     criteria.crit_cnt = 1;
 915
 916     sort_active_file_in_place (ds, &criteria);
 917 }
 918
 919   /* ... and we don't need our sort key anymore. So delete it */
 920   dict_delete_var (dataset_dict (ds), order);
 921
 922   rank_cleanup();
 923
 924   return (result ? CMD_SUCCESS : CMD_CASCADING_FAILURE);
 925 }
 926
 927
 928 /* Parser for the variables sub command
 929    Returns 1 on success */
 930 static int
 931 rank_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd UNUSED, void *aux UNUSED)
 932 {
 933   static const int terminators[2] = {T_BY, 0};
 934
 935   lex_match (lexer, '=');
 936
 937   if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL)
 938       && lex_token (lexer) != T_ALL)
 939       return 2;
 940
 941   sc = sort_parse_criteria (lexer, dataset_dict (ds),
 942                             &src_vars, &n_src_vars, 0, terminators);
 943
 944   if ( lex_match (lexer, T_BY)  )
 945     {
 946       if ((lex_token (lexer) != T_ID || dict_lookup_var (dataset_dict (ds), lex_tokid (lexer)) == NULL))
 947         {
 948           return 2;
 949         }
 950
 951       if (!parse_variables (lexer, dataset_dict (ds),
 952                             &group_vars, &n_group_vars,
 953                             PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
 954         {
 955           free (group_vars);
 956           return 0;
 957         }
 958     }
 959
 960   return 1;
 961 }
 962
 963
 964 /* Parse the [/rank INTO var1 var2 ... varN ] clause */
 965 static int
 966 parse_rank_function (struct lexer *lexer, struct dictionary *dict, struct cmd_rank *cmd UNUSED, enum RANK_FUNC f)
 967 {
 968   int var_count = 0;
 969
 970   n_rank_specs++;
 971   rank_specs = xnrealloc(rank_specs, n_rank_specs, sizeof *rank_specs);
 972   rank_specs[n_rank_specs - 1].rfunc = f;
 973   rank_specs[n_rank_specs - 1].destvars = NULL;
 974
 975   rank_specs[n_rank_specs - 1].destvars =
 976             xcalloc (sc->crit_cnt, sizeof (struct variable *));
 977
 978   if (lex_match_id (lexer, "INTO"))
 979     {
 980       struct variable *destvar;
 981
 982       while( lex_token (lexer) == T_ID )
 983         {
 984
 985           if ( dict_lookup_var (dict, lex_tokid (lexer)) != NULL )
 986             {
 987               msg(SE, _("Variable %s already exists."), lex_tokid (lexer));
 988               return 0;
 989             }
 990           if ( var_count >= sc->crit_cnt )
 991             {
 992               msg(SE, _("Too many variables in INTO clause."));
 993               return 0;
 994             }
 995
 996           destvar = create_rank_variable (dict, f, src_vars[var_count], lex_tokid (lexer));
 997           rank_specs[n_rank_specs - 1].destvars[var_count] = destvar ;
 998
 999           lex_get (lexer);
1000           ++var_count;
1001         }
1002     }
1003
1004   return 1;
1005 }
1006
1007
1008 static int
1009 rank_custom_rank (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1010 {
1011   struct dictionary *dict = dataset_dict (ds);
1012
1013   return parse_rank_function (lexer, dict, cmd, RANK);
1014 }
1015
1016 static int
1017 rank_custom_normal (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1018 {
1019   struct  dictionary *dict = dataset_dict (ds);
1020
1021   return parse_rank_function (lexer, dict, cmd, NORMAL);
1022 }
1023
1024 static int
1025 rank_custom_percent (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1026 {
1027   struct dictionary *dict = dataset_dict (ds);
1028
1029   return parse_rank_function (lexer, dict, cmd, PERCENT);
1030 }
1031
1032 static int
1033 rank_custom_rfraction (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1034 {
1035   struct dictionary *dict = dataset_dict (ds);
1036
1037   return parse_rank_function (lexer, dict, cmd, RFRACTION);
1038 }
1039
1040 static int
1041 rank_custom_proportion (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1042 {
1043   struct dictionary *dict = dataset_dict (ds);
1044
1045   return parse_rank_function (lexer, dict, cmd, PROPORTION);
1046 }
1047
1048 static int
1049 rank_custom_n (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1050 {
1051   struct dictionary *dict = dataset_dict (ds);
1052
1053   return parse_rank_function (lexer, dict, cmd, N);
1054 }
1055
1056 static int
1057 rank_custom_savage (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1058 {
1059   struct dictionary *dict = dataset_dict (ds);
1060
1061   return parse_rank_function (lexer, dict, cmd, SAVAGE);
1062 }
1063
1064
1065 static int
1066 rank_custom_ntiles (struct lexer *lexer, struct dataset *ds, struct cmd_rank *cmd, void *aux UNUSED )
1067 {
1068   struct dictionary *dict = dataset_dict (ds);
1069
1070   if ( lex_force_match (lexer, '(') )
1071     {
1072       if ( lex_force_int (lexer) )
1073         {
1074           k_ntiles = lex_integer (lexer);
1075           lex_get (lexer);
1076           lex_force_match (lexer, ')');
1077         }
1078       else
1079         return 0;
1080     }
1081   else
1082     return 0;
1083
1084   return parse_rank_function (lexer, dict, cmd, NTILES);
1085 }