src/language/data-io/get.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18    02110-1301, USA. */
  19
  20 #include <config.h>
  21
  22 #include <stdlib.h>
  23
  24 #include <data/any-reader.h>
  25 #include <data/any-writer.h>
  26 #include <data/case-sink.h>
  27 #include <data/case-source.h>
  28 #include <data/case.h>
  29 #include <data/casefile.h>
  30 #include <data/fastfile.h>
  31 #include <data/dictionary.h>
  32 #include <data/por-file-writer.h>
  33 #include <data/procedure.h>
  34 #include <data/settings.h>
  35 #include <data/storage-stream.h>
  36 #include <data/sys-file-writer.h>
  37 #include <data/transformations.h>
  38 #include <data/value-labels.h>
  39 #include <data/variable.h>
  40 #include <language/command.h>
  41 #include <language/data-io/file-handle.h>
  42 #include <language/lexer/lexer.h>
  43 #include <language/lexer/variable-parser.h>
  44 #include <libpspp/alloc.h>
  45 #include <libpspp/assertion.h>
  46 #include <libpspp/compiler.h>
  47 #include <libpspp/hash.h>
  48 #include <libpspp/message.h>
  49 #include <libpspp/message.h>
  50 #include <libpspp/misc.h>
  51 #include <libpspp/str.h>
  52
  53 #include "gettext.h"
  54 #define _(msgid) gettext (msgid)
  55
  56 /* Rearranging and reducing a dictionary. */
  57 static void start_case_map (struct dictionary *);
  58 static struct case_map *finish_case_map (struct dictionary *);
  59 static void map_case (const struct case_map *,
  60                       const struct ccase *, struct ccase *);
  61 static void destroy_case_map (struct case_map *);
  62
  63 static bool parse_dict_trim (struct dictionary *);
  64 \f
  65 /* Reading system and portable files. */
  66
  67 /* Type of command. */
  68 enum reader_command
  69   {
  70     GET_CMD,
  71     IMPORT_CMD
  72   };
  73
  74 /* Case reader input program. */
  75 struct case_reader_pgm
  76   {
  77     struct any_reader *reader;  /* File reader. */
  78     struct case_map *map;       /* Map from file dict to active file dict. */
  79     struct ccase bounce;        /* Bounce buffer. */
  80   };
  81
  82 static const struct case_source_class case_reader_source_class;
  83
  84 static void case_reader_pgm_free (struct case_reader_pgm *);
  85
  86 /* Parses a GET or IMPORT command. */
  87 static int
  88 parse_read_command (enum reader_command type)
  89 {
  90   struct case_reader_pgm *pgm = NULL;
  91   struct file_handle *fh = NULL;
  92   struct dictionary *dict = NULL;
  93
  94   for (;;)
  95     {
  96       lex_match ('/');
  97
  98       if (lex_match_id ("FILE") || token == T_STRING)
  99         {
 100           lex_match ('=');
 101
 102           fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
 103           if (fh == NULL)
 104             goto error;
 105         }
 106       else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
 107         {
 108           lex_match ('=');
 109
 110           if (lex_match_id ("COMM"))
 111             type = PFM_COMM;
 112           else if (lex_match_id ("TAPE"))
 113             type = PFM_TAPE;
 114           else
 115             {
 116               lex_error (_("expecting COMM or TAPE"));
 117               goto error;
 118             }
 119         }
 120       else
 121         break;
 122     }
 123
 124   if (fh == NULL)
 125     {
 126       lex_sbc_missing ("FILE");
 127       goto error;
 128     }
 129
 130   discard_variables (current_dataset);
 131
 132   pgm = xmalloc (sizeof *pgm);
 133   pgm->reader = any_reader_open (fh, &dict);
 134   pgm->map = NULL;
 135   case_nullify (&pgm->bounce);
 136   if (pgm->reader == NULL)
 137     goto error;
 138
 139   case_create (&pgm->bounce, dict_get_next_value_idx (dict));
 140
 141   start_case_map (dict);
 142
 143   while (token != '.')
 144     {
 145       lex_match ('/');
 146       if (!parse_dict_trim (dict))
 147         goto error;
 148     }
 149
 150   pgm->map = finish_case_map (dict);
 151
 152   dict_destroy (dataset_dict (current_dataset));
 153   dataset_set_dict (current_dataset, dict);
 154
 155   proc_set_source (current_dataset,
 156                    create_case_source (&case_reader_source_class, pgm));
 157
 158   return CMD_SUCCESS;
 159
 160  error:
 161   case_reader_pgm_free (pgm);
 162   if (dict != NULL)
 163     dict_destroy (dict);
 164   return CMD_CASCADING_FAILURE;
 165 }
 166
 167 /* Frees a struct case_reader_pgm. */
 168 static void
 169 case_reader_pgm_free (struct case_reader_pgm *pgm)
 170 {
 171   if (pgm != NULL)
 172     {
 173       any_reader_close (pgm->reader);
 174       destroy_case_map (pgm->map);
 175       case_destroy (&pgm->bounce);
 176       free (pgm);
 177     }
 178 }
 179
 180 /* Clears internal state related to case reader input procedure. */
 181 static void
 182 case_reader_source_destroy (struct case_source *source)
 183 {
 184   struct case_reader_pgm *pgm = source->aux;
 185   case_reader_pgm_free (pgm);
 186 }
 187
 188 /* Reads all the cases from the data file into C and passes them
 189    to WRITE_CASE one by one, passing WC_DATA.
 190    Returns true if successful, false if an I/O error occurred. */
 191 static bool
 192 case_reader_source_read (struct case_source *source,
 193                          struct ccase *c,
 194                          write_case_func *write_case, write_case_data wc_data)
 195 {
 196   struct case_reader_pgm *pgm = source->aux;
 197   bool ok = true;
 198
 199   do
 200     {
 201       bool got_case;
 202       if (pgm->map == NULL)
 203         got_case = any_reader_read (pgm->reader, c);
 204       else
 205         {
 206           got_case = any_reader_read (pgm->reader, &pgm->bounce);
 207           if (got_case)
 208             map_case (pgm->map, &pgm->bounce, c);
 209         }
 210       if (!got_case)
 211         break;
 212
 213       ok = write_case (wc_data);
 214     }
 215   while (ok);
 216
 217   return ok && !any_reader_error (pgm->reader);
 218 }
 219
 220 static const struct case_source_class case_reader_source_class =
 221   {
 222     "case reader",
 223     NULL,
 224     case_reader_source_read,
 225     case_reader_source_destroy,
 226   };
 227 \f
 228 /* GET. */
 229 int
 230 cmd_get (void)
 231 {
 232   return parse_read_command (GET_CMD);
 233 }
 234
 235 /* IMPORT. */
 236 int
 237 cmd_import (void)
 238 {
 239   return parse_read_command (IMPORT_CMD);
 240 }
 241 \f
 242 /* Writing system and portable files. */
 243
 244 /* Type of output file. */
 245 enum writer_type
 246   {
 247     SYSFILE_WRITER,     /* System file. */
 248     PORFILE_WRITER      /* Portable file. */
 249   };
 250
 251 /* Type of a command. */
 252 enum command_type
 253   {
 254     XFORM_CMD,          /* Transformation. */
 255     PROC_CMD            /* Procedure. */
 256   };
 257
 258 /* File writer plus a case map. */
 259 struct case_writer
 260   {
 261     struct any_writer *writer;  /* File writer. */
 262     struct case_map *map;       /* Map to output file dictionary
 263                                    (null pointer for identity mapping). */
 264     struct ccase bounce;        /* Bounce buffer for mapping (if needed). */
 265   };
 266
 267 /* Destroys AW. */
 268 static bool
 269 case_writer_destroy (struct case_writer *aw)
 270 {
 271   bool ok = true;
 272   if (aw != NULL)
 273     {
 274       ok = any_writer_close (aw->writer);
 275       destroy_case_map (aw->map);
 276       case_destroy (&aw->bounce);
 277       free (aw);
 278     }
 279   return ok;
 280 }
 281
 282 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
 283    WRITER_TYPE identifies the type of file to write,
 284    and COMMAND_TYPE identifies the type of command.
 285
 286    On success, returns a writer.
 287    For procedures only, sets *RETAIN_UNSELECTED to true if cases
 288    that would otherwise be excluded by FILTER or USE should be
 289    included.
 290
 291    On failure, returns a null pointer. */
 292 static struct case_writer *
 293 parse_write_command (enum writer_type writer_type,
 294                      enum command_type command_type,
 295                      bool *retain_unselected)
 296 {
 297   /* Common data. */
 298   struct file_handle *handle; /* Output file. */
 299   struct dictionary *dict;    /* Dictionary for output file. */
 300   struct case_writer *aw;      /* Writer. */
 301
 302   /* Common options. */
 303   bool print_map;             /* Print map?  TODO. */
 304   bool print_short_names;     /* Print long-to-short name map.  TODO. */
 305   struct sfm_write_options sysfile_opts;
 306   struct pfm_write_options porfile_opts;
 307
 308   assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
 309   assert (command_type == XFORM_CMD || command_type == PROC_CMD);
 310   assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
 311
 312   if (command_type == PROC_CMD)
 313     *retain_unselected = true;
 314
 315   handle = NULL;
 316   dict = dict_clone (dataset_dict (current_dataset));
 317   aw = xmalloc (sizeof *aw);
 318   aw->writer = NULL;
 319   aw->map = NULL;
 320   case_nullify (&aw->bounce);
 321   print_map = false;
 322   print_short_names = false;
 323   sysfile_opts = sfm_writer_default_options ();
 324   porfile_opts = pfm_writer_default_options ();
 325
 326   start_case_map (dict);
 327   dict_delete_scratch_vars (dict);
 328
 329   lex_match ('/');
 330   for (;;)
 331     {
 332       if (lex_match_id ("OUTFILE"))
 333         {
 334           if (handle != NULL)
 335             {
 336               lex_sbc_only_once ("OUTFILE");
 337               goto error;
 338             }
 339
 340           lex_match ('=');
 341
 342           handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
 343           if (handle == NULL)
 344             goto error;
 345         }
 346       else if (lex_match_id ("NAMES"))
 347         print_short_names = true;
 348       else if (lex_match_id ("PERMISSIONS"))
 349         {
 350           bool cw;
 351
 352           lex_match ('=');
 353           if (lex_match_id ("READONLY"))
 354             cw = false;
 355           else if (lex_match_id ("WRITEABLE"))
 356             cw = true;
 357           else
 358             {
 359               lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
 360               goto error;
 361             }
 362           sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
 363         }
 364       else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
 365         {
 366           lex_match ('=');
 367           if (lex_match_id ("RETAIN"))
 368             *retain_unselected = true;
 369           else if (lex_match_id ("DELETE"))
 370             *retain_unselected = false;
 371           else
 372             {
 373               lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
 374               goto error;
 375             }
 376         }
 377       else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
 378         sysfile_opts.compress = true;
 379       else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
 380         sysfile_opts.compress = false;
 381       else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
 382         {
 383           lex_match ('=');
 384           if (!lex_force_int ())
 385             goto error;
 386           sysfile_opts.version = lex_integer ();
 387           lex_get ();
 388         }
 389       else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
 390         {
 391           lex_match ('=');
 392           if (lex_match_id ("COMMUNICATIONS"))
 393             porfile_opts.type = PFM_COMM;
 394           else if (lex_match_id ("TAPE"))
 395             porfile_opts.type = PFM_TAPE;
 396           else
 397             {
 398               lex_error (_("expecting %s or %s"), "COMM", "TAPE");
 399               goto error;
 400             }
 401         }
 402       else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
 403         {
 404           lex_match ('=');
 405           if (!lex_force_int ())
 406             goto error;
 407           porfile_opts.digits = lex_integer ();
 408           lex_get ();
 409         }
 410       else if (!parse_dict_trim (dict))
 411         goto error;
 412
 413       if (!lex_match ('/'))
 414         break;
 415     }
 416   if (lex_end_of_command () != CMD_SUCCESS)
 417     goto error;
 418
 419   if (handle == NULL)
 420     {
 421       lex_sbc_missing ("OUTFILE");
 422       goto error;
 423     }
 424
 425   dict_compact_values (dict);
 426   aw->map = finish_case_map (dict);
 427   if (aw->map != NULL)
 428     case_create (&aw->bounce, dict_get_next_value_idx (dict));
 429
 430   if (fh_get_referent (handle) == FH_REF_FILE)
 431     {
 432       switch (writer_type)
 433         {
 434         case SYSFILE_WRITER:
 435           aw->writer = any_writer_from_sfm_writer (
 436             sfm_open_writer (handle, dict, sysfile_opts));
 437           break;
 438         case PORFILE_WRITER:
 439           aw->writer = any_writer_from_pfm_writer (
 440             pfm_open_writer (handle, dict, porfile_opts));
 441           break;
 442         }
 443     }
 444   else
 445     aw->writer = any_writer_open (handle, dict);
 446   if (aw->writer == NULL)
 447     goto error;
 448   dict_destroy (dict);
 449
 450   return aw;
 451
 452  error:
 453   case_writer_destroy (aw);
 454   dict_destroy (dict);
 455   return NULL;
 456 }
 457
 458 /* Writes case C to writer AW. */
 459 static bool
 460 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
 461 {
 462   if (aw->map != NULL)
 463     {
 464       map_case (aw->map, c, &aw->bounce);
 465       c = &aw->bounce;
 466     }
 467   return any_writer_write (aw->writer, c);
 468 }
 469 \f
 470 /* SAVE and EXPORT. */
 471
 472 static bool output_proc (const struct ccase *, void *);
 473
 474 /* Parses and performs the SAVE or EXPORT procedure. */
 475 static int
 476 parse_output_proc (enum writer_type writer_type)
 477 {
 478   bool retain_unselected;
 479   struct variable *saved_filter_variable;
 480   struct case_writer *aw;
 481   bool ok;
 482
 483   aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
 484   if (aw == NULL)
 485     return CMD_CASCADING_FAILURE;
 486
 487   saved_filter_variable = dict_get_filter (dataset_dict (current_dataset));
 488   if (retain_unselected)
 489     dict_set_filter (dataset_dict (current_dataset), NULL);
 490   ok = procedure (current_dataset,output_proc, aw);
 491   dict_set_filter (dataset_dict (current_dataset), saved_filter_variable);
 492
 493   case_writer_destroy (aw);
 494   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 495 }
 496
 497 /* Writes case C to file. */
 498 static bool
 499 output_proc (const struct ccase *c, void *aw_)
 500 {
 501   struct case_writer *aw = aw_;
 502   return case_writer_write_case (aw, c);
 503 }
 504
 505 int
 506 cmd_save (void)
 507 {
 508   return parse_output_proc (SYSFILE_WRITER);
 509 }
 510
 511 int
 512 cmd_export (void)
 513 {
 514   return parse_output_proc (PORFILE_WRITER);
 515 }
 516 \f
 517 /* XSAVE and XEXPORT. */
 518
 519 /* Transformation. */
 520 struct output_trns
 521   {
 522     struct case_writer *aw;      /* Writer. */
 523   };
 524
 525 static trns_proc_func output_trns_proc;
 526 static trns_free_func output_trns_free;
 527
 528 /* Parses the XSAVE or XEXPORT transformation command. */
 529 static int
 530 parse_output_trns (enum writer_type writer_type)
 531 {
 532   struct output_trns *t = xmalloc (sizeof *t);
 533   t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
 534   if (t->aw == NULL)
 535     {
 536       free (t);
 537       return CMD_CASCADING_FAILURE;
 538     }
 539
 540   add_transformation (current_dataset, output_trns_proc, output_trns_free, t);
 541   return CMD_SUCCESS;
 542 }
 543
 544 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
 545 static int
 546 output_trns_proc (void *trns_, struct ccase *c, casenum_t case_num UNUSED)
 547 {
 548   struct output_trns *t = trns_;
 549   case_writer_write_case (t->aw, c);
 550   return TRNS_CONTINUE;
 551 }
 552
 553 /* Frees an XSAVE or XEXPORT transformation.
 554    Returns true if successful, false if an I/O error occurred. */
 555 static bool
 556 output_trns_free (void *trns_)
 557 {
 558   struct output_trns *t = trns_;
 559   bool ok = true;
 560
 561   if (t != NULL)
 562     {
 563       ok = case_writer_destroy (t->aw);
 564       free (t);
 565     }
 566   return ok;
 567 }
 568
 569 /* XSAVE command. */
 570 int
 571 cmd_xsave (void)
 572 {
 573   return parse_output_trns (SYSFILE_WRITER);
 574 }
 575
 576 /* XEXPORT command. */
 577 int
 578 cmd_xexport (void)
 579 {
 580   return parse_output_trns (PORFILE_WRITER);
 581 }
 582 \f
 583 static bool rename_variables (struct dictionary *dict);
 584 static bool drop_variables (struct dictionary *dict);
 585 static bool keep_variables (struct dictionary *dict);
 586
 587 /* Commands that read and write system files share a great deal
 588    of common syntactic structure for rearranging and dropping
 589    variables.  This function parses this syntax and modifies DICT
 590    appropriately.  Returns true on success, false on failure. */
 591 static bool
 592 parse_dict_trim (struct dictionary *dict)
 593 {
 594   if (lex_match_id ("MAP"))
 595     {
 596       /* FIXME. */
 597       return true;
 598     }
 599   else if (lex_match_id ("DROP"))
 600     return drop_variables (dict);
 601   else if (lex_match_id ("KEEP"))
 602     return keep_variables (dict);
 603   else if (lex_match_id ("RENAME"))
 604     return rename_variables (dict);
 605   else
 606     {
 607       lex_error (_("expecting a valid subcommand"));
 608       return false;
 609     }
 610 }
 611
 612 /* Parses and performs the RENAME subcommand of GET and SAVE. */
 613 static bool
 614 rename_variables (struct dictionary *dict)
 615 {
 616   size_t i;
 617
 618   int success = 0;
 619
 620   struct variable **v;
 621   char **new_names;
 622   size_t nv, nn;
 623   char *err_name;
 624
 625   int group;
 626
 627   lex_match ('=');
 628   if (token != '(')
 629     {
 630       struct variable *v;
 631
 632       v = parse_dict_variable (dict);
 633       if (v == NULL)
 634         return 0;
 635       if (!lex_force_match ('=')
 636           || !lex_force_id ())
 637         return 0;
 638       if (dict_lookup_var (dict, tokid) != NULL)
 639         {
 640           msg (SE, _("Cannot rename %s as %s because there already exists "
 641                      "a variable named %s.  To rename variables with "
 642                      "overlapping names, use a single RENAME subcommand "
 643                      "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
 644                      "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
 645           return 0;
 646         }
 647
 648       dict_rename_var (dict, v, tokid);
 649       lex_get ();
 650       return 1;
 651     }
 652
 653   nv = nn = 0;
 654   v = NULL;
 655   new_names = 0;
 656   group = 1;
 657   while (lex_match ('('))
 658     {
 659       size_t old_nv = nv;
 660
 661       if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
 662         goto done;
 663       if (!lex_match ('='))
 664         {
 665           msg (SE, _("`=' expected after variable list."));
 666           goto done;
 667         }
 668       if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
 669         goto done;
 670       if (nn != nv)
 671         {
 672           msg (SE, _("Number of variables on left side of `=' (%d) does not "
 673                      "match number of variables on right side (%d), in "
 674                      "parenthesized group %d of RENAME subcommand."),
 675                (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
 676           goto done;
 677         }
 678       if (!lex_force_match (')'))
 679         goto done;
 680       group++;
 681     }
 682
 683   if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
 684     {
 685       msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
 686       goto done;
 687     }
 688   success = 1;
 689
 690  done:
 691   for (i = 0; i < nn; i++)
 692     free (new_names[i]);
 693   free (new_names);
 694   free (v);
 695
 696   return success;
 697 }
 698
 699 /* Parses and performs the DROP subcommand of GET and SAVE.
 700    Returns true if successful, false on failure.*/
 701 static bool
 702 drop_variables (struct dictionary *dict)
 703 {
 704   struct variable **v;
 705   size_t nv;
 706
 707   lex_match ('=');
 708   if (!parse_variables (dict, &v, &nv, PV_NONE))
 709     return false;
 710   dict_delete_vars (dict, v, nv);
 711   free (v);
 712
 713   if (dict_get_var_cnt (dict) == 0)
 714     {
 715       msg (SE, _("Cannot DROP all variables from dictionary."));
 716       return false;
 717     }
 718   return true;
 719 }
 720
 721 /* Parses and performs the KEEP subcommand of GET and SAVE.
 722    Returns true if successful, false on failure.*/
 723 static bool
 724 keep_variables (struct dictionary *dict)
 725 {
 726   struct variable **v;
 727   size_t nv;
 728   size_t i;
 729
 730   lex_match ('=');
 731   if (!parse_variables (dict, &v, &nv, PV_NONE))
 732     return false;
 733
 734   /* Move the specified variables to the beginning. */
 735   dict_reorder_vars (dict, v, nv);
 736
 737   /* Delete the remaining variables. */
 738   v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
 739   for (i = nv; i < dict_get_var_cnt (dict); i++)
 740     v[i - nv] = dict_get_var (dict, i);
 741   dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
 742   free (v);
 743
 744   return true;
 745 }
 746 \f
 747 /* MATCH FILES. */
 748
 749 /* File types. */
 750 enum
 751   {
 752     MTF_FILE,                   /* Specified on FILE= subcommand. */
 753     MTF_TABLE                   /* Specified on TABLE= subcommand. */
 754   };
 755
 756 /* One of the files on MATCH FILES. */
 757 struct mtf_file
 758   {
 759     struct mtf_file *next, *prev; /* Next, previous in the list of files. */
 760     struct mtf_file *next_min;  /* Next in the chain of minimums. */
 761
 762     int type;                   /* One of MTF_*. */
 763     struct variable **by;       /* List of BY variables for this file. */
 764     struct file_handle *handle; /* File handle. */
 765     struct any_reader *reader;  /* File reader. */
 766     struct dictionary *dict;    /* Dictionary from system file. */
 767
 768     /* IN subcommand. */
 769     char *in_name;              /* Variable name. */
 770     struct variable *in_var;    /* Variable (in master dictionary). */
 771
 772     struct ccase input;         /* Input record. */
 773   };
 774
 775 /* MATCH FILES procedure. */
 776 struct mtf_proc
 777   {
 778     struct mtf_file *head;      /* First file mentioned on FILE or TABLE. */
 779     struct mtf_file *tail;      /* Last file mentioned on FILE or TABLE. */
 780
 781     bool ok;                    /* False if I/O error occurs. */
 782
 783     size_t by_cnt;              /* Number of variables on BY subcommand. */
 784
 785     /* Names of FIRST, LAST variables. */
 786     char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
 787
 788     struct dictionary *dict;    /* Dictionary of output file. */
 789     struct casefile *output;    /* MATCH FILES output. */
 790     struct ccase mtf_case;      /* Case used for output. */
 791
 792     unsigned seq_num;           /* Have we initialized this variable? */
 793     unsigned *seq_nums;         /* Sequence numbers for each var in dict. */
 794   };
 795
 796 static bool mtf_free (struct mtf_proc *);
 797 static bool mtf_close_file (struct mtf_file *);
 798 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
 799 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
 800
 801 static bool mtf_read_nonactive_records (void *);
 802 static bool mtf_processing_finish (void *);
 803 static bool mtf_processing (const struct ccase *, void *);
 804
 805 static char *var_type_description (struct variable *);
 806
 807 static void set_master (struct variable *, struct variable *master);
 808 static struct variable *get_master (struct variable *);
 809
 810 /* Parse and execute the MATCH FILES command. */
 811 int
 812 cmd_match_files (void)
 813 {
 814   struct mtf_proc mtf;
 815   struct mtf_file *first_table = NULL;
 816   struct mtf_file *iter;
 817
 818   bool used_active_file = false;
 819   bool saw_table = false;
 820   bool saw_in = false;
 821
 822   bool ok;
 823
 824   mtf.head = mtf.tail = NULL;
 825   mtf.by_cnt = 0;
 826   mtf.first[0] = '\0';
 827   mtf.last[0] = '\0';
 828   mtf.dict = dict_create ();
 829   mtf.output = NULL;
 830   case_nullify (&mtf.mtf_case);
 831   mtf.seq_num = 0;
 832   mtf.seq_nums = NULL;
 833   dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (current_dataset)));
 834
 835   lex_match ('/');
 836   while (token == T_ID
 837          && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
 838     {
 839       struct mtf_file *file = xmalloc (sizeof *file);
 840
 841       if (lex_match_id ("FILE"))
 842         file->type = MTF_FILE;
 843       else if (lex_match_id ("TABLE"))
 844         {
 845           file->type = MTF_TABLE;
 846           saw_table = true;
 847         }
 848       else
 849         NOT_REACHED ();
 850       lex_match ('=');
 851
 852       file->by = NULL;
 853       file->handle = NULL;
 854       file->reader = NULL;
 855       file->dict = NULL;
 856       file->in_name = NULL;
 857       file->in_var = NULL;
 858       case_nullify (&file->input);
 859
 860       /* FILEs go first, then TABLEs. */
 861       if (file->type == MTF_TABLE || first_table == NULL)
 862         {
 863           file->next = NULL;
 864           file->prev = mtf.tail;
 865           if (mtf.tail)
 866             mtf.tail->next = file;
 867           mtf.tail = file;
 868           if (mtf.head == NULL)
 869             mtf.head = file;
 870           if (file->type == MTF_TABLE && first_table == NULL)
 871             first_table = file;
 872         }
 873       else
 874         {
 875           assert (file->type == MTF_FILE);
 876           file->next = first_table;
 877           file->prev = first_table->prev;
 878           if (first_table->prev)
 879             first_table->prev->next = file;
 880           else
 881             mtf.head = file;
 882           first_table->prev = file;
 883         }
 884
 885       if (lex_match ('*'))
 886         {
 887           file->handle = NULL;
 888           file->reader = NULL;
 889
 890           if (used_active_file)
 891             {
 892               msg (SE, _("The active file may not be specified more "
 893                          "than once."));
 894               goto error;
 895             }
 896           used_active_file = true;
 897
 898           if (!proc_has_source (current_dataset))
 899             {
 900               msg (SE, _("Cannot specify the active file since no active "
 901                          "file has been defined."));
 902               goto error;
 903             }
 904
 905           if (proc_make_temporary_transformations_permanent (current_dataset))
 906             msg (SE,
 907                  _("MATCH FILES may not be used after TEMPORARY when "
 908                    "the active file is an input source.  "
 909                    "Temporary transformations will be made permanent."));
 910
 911           file->dict = dataset_dict (current_dataset);
 912         }
 913       else
 914         {
 915           file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
 916           if (file->handle == NULL)
 917             goto error;
 918
 919           file->reader = any_reader_open (file->handle, &file->dict);
 920           if (file->reader == NULL)
 921             goto error;
 922
 923           case_create (&file->input, dict_get_next_value_idx (file->dict));
 924         }
 925
 926       while (lex_match ('/'))
 927         if (lex_match_id ("RENAME"))
 928           {
 929             if (!rename_variables (file->dict))
 930               goto error;
 931           }
 932         else if (lex_match_id ("IN"))
 933           {
 934             lex_match ('=');
 935             if (token != T_ID)
 936               {
 937                 lex_error (NULL);
 938                 goto error;
 939               }
 940
 941             if (file->in_name != NULL)
 942               {
 943                 msg (SE, _("Multiple IN subcommands for a single FILE or "
 944                            "TABLE."));
 945                 goto error;
 946               }
 947             file->in_name = xstrdup (tokid);
 948             lex_get ();
 949             saw_in = true;
 950           }
 951
 952       mtf_merge_dictionary (mtf.dict, file);
 953     }
 954
 955   while (token != '.')
 956     {
 957       if (lex_match (T_BY))
 958         {
 959           struct variable **by;
 960
 961           if (mtf.by_cnt)
 962             {
 963               msg (SE, _("BY may appear at most once."));
 964               goto error;
 965             }
 966
 967           lex_match ('=');
 968           if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
 969                                 PV_NO_DUPLICATE | PV_NO_SCRATCH))
 970             goto error;
 971
 972           for (iter = mtf.head; iter != NULL; iter = iter->next)
 973             {
 974               size_t i;
 975
 976               iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
 977
 978               for (i = 0; i < mtf.by_cnt; i++)
 979                 {
 980                   iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
 981                   if (iter->by[i] == NULL)
 982                     {
 983                       msg (SE, _("File %s lacks BY variable %s."),
 984                            iter->handle ? fh_get_name (iter->handle) : "*",
 985                            by[i]->name);
 986                       free (by);
 987                       goto error;
 988                     }
 989                 }
 990             }
 991           free (by);
 992         }
 993       else if (lex_match_id ("FIRST"))
 994         {
 995           if (mtf.first[0] != '\0')
 996             {
 997               msg (SE, _("FIRST may appear at most once."));
 998               goto error;
 999             }
1000
1001           lex_match ('=');
1002           if (!lex_force_id ())
1003             goto error;
1004           strcpy (mtf.first, tokid);
1005           lex_get ();
1006         }
1007       else if (lex_match_id ("LAST"))
1008         {
1009           if (mtf.last[0] != '\0')
1010             {
1011               msg (SE, _("LAST may appear at most once."));
1012               goto error;
1013             }
1014
1015           lex_match ('=');
1016           if (!lex_force_id ())
1017             goto error;
1018           strcpy (mtf.last, tokid);
1019           lex_get ();
1020         }
1021       else if (lex_match_id ("MAP"))
1022         {
1023           /* FIXME. */
1024         }
1025       else if (lex_match_id ("DROP"))
1026         {
1027           if (!drop_variables (mtf.dict))
1028             goto error;
1029         }
1030       else if (lex_match_id ("KEEP"))
1031         {
1032           if (!keep_variables (mtf.dict))
1033             goto error;
1034         }
1035       else
1036         {
1037           lex_error (NULL);
1038           goto error;
1039         }
1040
1041       if (!lex_match ('/') && token != '.')
1042         {
1043           lex_end_of_command ();
1044           goto error;
1045         }
1046     }
1047
1048   if (mtf.by_cnt == 0)
1049     {
1050       if (saw_table)
1051         {
1052           msg (SE, _("BY is required when TABLE is specified."));
1053           goto error;
1054         }
1055       if (saw_in)
1056         {
1057           msg (SE, _("BY is required when IN is specified."));
1058           goto error;
1059         }
1060     }
1061
1062   /* Set up mapping from each file's variables to master
1063      variables. */
1064   for (iter = mtf.head; iter != NULL; iter = iter->next)
1065     {
1066       struct dictionary *d = iter->dict;
1067       int i;
1068
1069       for (i = 0; i < dict_get_var_cnt (d); i++)
1070         {
1071           struct variable *v = dict_get_var (d, i);
1072           struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1073           if (mv != NULL)
1074             set_master (v, mv);
1075         }
1076     }
1077
1078   /* Add IN variables to master dictionary. */
1079   for (iter = mtf.head; iter != NULL; iter = iter->next)
1080     if (iter->in_name != NULL)
1081       {
1082         iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1083         if (iter->in_var == NULL)
1084           {
1085             msg (SE, _("IN variable name %s duplicates an "
1086                        "existing variable name."),
1087                  iter->in_var->name);
1088             goto error;
1089           }
1090         iter->in_var->print = iter->in_var->write
1091           = make_output_format (FMT_F, 1, 0);
1092       }
1093
1094   /* MATCH FILES performs an n-way merge on all its input files.
1095      Abstract algorithm:
1096
1097      1. Read one input record from every input FILE.
1098
1099      2. If no FILEs are left, stop.  Otherwise, proceed to step 3.
1100
1101      3. Find the FILE input record(s) that have minimum BY
1102      values.  Store all the values from these input records into
1103      the output record.
1104
1105      4. For every TABLE, read another record as long as the BY values
1106      on the TABLE's input record are less than the FILEs' BY values.
1107      If an exact match is found, store all the values from the TABLE
1108      input record into the output record.
1109
1110      5. Write the output record.
1111
1112      6. Read another record from each input file FILE and TABLE that
1113      we stored values from above.  If we come to the end of one of the
1114      input files, remove it from the list of input files.
1115
1116      7. Repeat from step 2.
1117
1118      Unfortunately, this algorithm can't be implemented in a
1119      straightforward way because there's no function to read a
1120      record from the active file.  Instead, it has to be written
1121      as a state machine.
1122
1123      FIXME: For merging large numbers of files (more than 10?) a
1124      better algorithm would use a heap for finding minimum
1125      values. */
1126
1127   if (!used_active_file)
1128     discard_variables (current_dataset);
1129
1130   dict_compact_values (mtf.dict);
1131   mtf.output = fastfile_create (dict_get_next_value_idx (mtf.dict));
1132   mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1133   case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1134
1135   if (!mtf_read_nonactive_records (&mtf))
1136     goto error;
1137
1138   if (used_active_file)
1139     {
1140       proc_set_sink (current_dataset,
1141                      create_case_sink (&null_sink_class,
1142                                        dataset_dict (current_dataset), NULL));
1143       ok = procedure (current_dataset,mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1144     }
1145   else
1146     ok = mtf_processing_finish (&mtf);
1147
1148   discard_variables (current_dataset);
1149
1150   dict_destroy (dataset_dict (current_dataset));
1151   dataset_set_dict (current_dataset, mtf.dict);
1152   mtf.dict = NULL;
1153   proc_set_source (current_dataset, storage_source_create (mtf.output));
1154   mtf.output = NULL;
1155
1156   if (!mtf_free (&mtf))
1157     ok = false;
1158   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1159
1160  error:
1161   mtf_free (&mtf);
1162   return CMD_CASCADING_FAILURE;
1163 }
1164
1165 /* Repeats 2...7 an arbitrary number of times. */
1166 static bool
1167 mtf_processing_finish (void *mtf_)
1168 {
1169   struct mtf_proc *mtf = mtf_;
1170   struct mtf_file *iter;
1171
1172   /* Find the active file and delete it. */
1173   for (iter = mtf->head; iter; iter = iter->next)
1174     if (iter->handle == NULL)
1175       {
1176         if (!mtf_delete_file_in_place (mtf, &iter))
1177           NOT_REACHED ();
1178         break;
1179       }
1180
1181   while (mtf->head && mtf->head->type == MTF_FILE)
1182     if (!mtf_processing (NULL, mtf))
1183       return false;
1184
1185   return true;
1186 }
1187
1188 /* Return a string in a static buffer describing V's variable type and
1189    width. */
1190 static char *
1191 var_type_description (struct variable *v)
1192 {
1193   static char buf[2][32];
1194   static int x = 0;
1195   char *s;
1196
1197   x ^= 1;
1198   s = buf[x];
1199
1200   if (v->type == NUMERIC)
1201     strcpy (s, "numeric");
1202   else
1203     {
1204       assert (v->type == ALPHA);
1205       sprintf (s, "string with width %d", v->width);
1206     }
1207   return s;
1208 }
1209
1210 /* Closes FILE and frees its associated data.
1211    Returns true if successful, false if an I/O error
1212    occurred on FILE. */
1213 static bool
1214 mtf_close_file (struct mtf_file *file)
1215 {
1216   bool ok = file->reader == NULL || !any_reader_error (file->reader);
1217   free (file->by);
1218   any_reader_close (file->reader);
1219   if (file->handle != NULL)
1220     dict_destroy (file->dict);
1221   case_destroy (&file->input);
1222   free (file->in_name);
1223   free (file);
1224   return ok;
1225 }
1226
1227 /* Free all the data for the MATCH FILES procedure.
1228    Returns true if successful, false if an I/O error
1229    occurred. */
1230 static bool
1231 mtf_free (struct mtf_proc *mtf)
1232 {
1233   struct mtf_file *iter, *next;
1234   bool ok = true;
1235
1236   for (iter = mtf->head; iter; iter = next)
1237     {
1238       next = iter->next;
1239       assert (iter->dict != mtf->dict);
1240       if (!mtf_close_file (iter))
1241         ok = false;
1242     }
1243
1244   if (mtf->dict)
1245     dict_destroy (mtf->dict);
1246   case_destroy (&mtf->mtf_case);
1247   free (mtf->seq_nums);
1248
1249   return ok;
1250 }
1251
1252 /* Remove *FILE from the mtf_file chain.  Make *FILE point to the next
1253    file in the chain, or to NULL if was the last in the chain.
1254    Returns true if successful, false if an I/O error occurred. */
1255 static bool
1256 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1257 {
1258   struct mtf_file *f = *file;
1259   int i;
1260
1261   if (f->prev)
1262     f->prev->next = f->next;
1263   if (f->next)
1264     f->next->prev = f->prev;
1265   if (f == mtf->head)
1266     mtf->head = f->next;
1267   if (f == mtf->tail)
1268     mtf->tail = f->prev;
1269   *file = f->next;
1270
1271   if (f->in_var != NULL)
1272     case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1273   for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1274     {
1275       struct variable *v = dict_get_var (f->dict, i);
1276       struct variable *mv = get_master (v);
1277       if (mv != NULL)
1278         {
1279           union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1280
1281           if (v->type == NUMERIC)
1282             out->f = SYSMIS;
1283           else
1284             memset (out->s, ' ', v->width);
1285         }
1286     }
1287
1288   return mtf_close_file (f);
1289 }
1290
1291 /* Read a record from every input file except the active file.
1292    Returns true if successful, false if an I/O error occurred. */
1293 static bool
1294 mtf_read_nonactive_records (void *mtf_)
1295 {
1296   struct mtf_proc *mtf = mtf_;
1297   struct mtf_file *iter, *next;
1298   bool ok = true;
1299
1300   for (iter = mtf->head; ok && iter != NULL; iter = next)
1301     {
1302       next = iter->next;
1303       if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1304         if (!mtf_delete_file_in_place (mtf, &iter))
1305           ok = false;
1306     }
1307   return ok;
1308 }
1309
1310 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1311    if A == B, 1 if A > B. */
1312 static inline int
1313 mtf_compare_BY_values (struct mtf_proc *mtf,
1314                        struct mtf_file *a, struct mtf_file *b,
1315                        const struct ccase *c)
1316 {
1317   const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1318   const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1319   assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1320   return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1321 }
1322
1323 /* Perform one iteration of steps 3...7 above.
1324    Returns true if successful, false if an I/O error occurred. */
1325 static bool
1326 mtf_processing (const struct ccase *c, void *mtf_)
1327 {
1328   struct mtf_proc *mtf = mtf_;
1329
1330   /* Do we need another record from the active file? */
1331   bool read_active_file;
1332
1333   assert (mtf->head != NULL);
1334   if (mtf->head->type == MTF_TABLE)
1335     return true;
1336
1337   do
1338     {
1339       struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1340       struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1341       struct mtf_file *iter, *next;
1342
1343       read_active_file = false;
1344
1345       /* 3. Find the FILE input record(s) that have minimum BY
1346          values.  Store all the values from these input records into
1347          the output record. */
1348       min_head = min_tail = mtf->head;
1349       max_head = max_tail = NULL;
1350       for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1351            iter = iter->next)
1352         {
1353           int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1354           if (cmp < 0)
1355             {
1356               if (max_head)
1357                 max_tail = max_tail->next_min = iter;
1358               else
1359                 max_head = max_tail = iter;
1360             }
1361           else if (cmp == 0)
1362             min_tail = min_tail->next_min = iter;
1363           else /* cmp > 0 */
1364             {
1365               if (max_head)
1366                 {
1367                   max_tail->next_min = min_head;
1368                   max_tail = min_tail;
1369                 }
1370               else
1371                 {
1372                   max_head = min_head;
1373                   max_tail = min_tail;
1374                 }
1375               min_head = min_tail = iter;
1376             }
1377         }
1378
1379       /* 4. For every TABLE, read another record as long as the BY
1380          values on the TABLE's input record are less than the FILEs'
1381          BY values.  If an exact match is found, store all the values
1382          from the TABLE input record into the output record. */
1383       for (; iter != NULL; iter = next)
1384         {
1385           assert (iter->type == MTF_TABLE);
1386
1387           next = iter->next;
1388           for (;;)
1389             {
1390               int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1391               if (cmp < 0)
1392                 {
1393                   if (max_head)
1394                     max_tail = max_tail->next_min = iter;
1395                   else
1396                     max_head = max_tail = iter;
1397                 }
1398               else if (cmp == 0)
1399                 min_tail = min_tail->next_min = iter;
1400               else /* cmp > 0 */
1401                 {
1402                   if (iter->handle == NULL)
1403                     return true;
1404                   if (any_reader_read (iter->reader, &iter->input))
1405                     continue;
1406                   if (!mtf_delete_file_in_place (mtf, &iter))
1407                     return false;
1408                 }
1409               break;
1410             }
1411         }
1412
1413       /* Next sequence number. */
1414       mtf->seq_num++;
1415
1416       /* Store data to all the records we are using. */
1417       if (min_tail)
1418         min_tail->next_min = NULL;
1419       for (iter = min_head; iter; iter = iter->next_min)
1420         {
1421           int i;
1422
1423           for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1424             {
1425               struct variable *v = dict_get_var (iter->dict, i);
1426               struct variable *mv = get_master (v);
1427
1428               if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1429                 {
1430                   const struct ccase *record
1431                     = case_is_null (&iter->input) ? c : &iter->input;
1432                   union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1433
1434                   mtf->seq_nums[mv->index] = mtf->seq_num;
1435                   if (v->type == NUMERIC)
1436                     out->f = case_num (record, v->fv);
1437                   else
1438                     memcpy (out->s, case_str (record, v->fv), v->width);
1439                 }
1440             }
1441           if (iter->in_var != NULL)
1442             case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1443
1444           if (iter->type == MTF_FILE && iter->handle == NULL)
1445             read_active_file = true;
1446         }
1447
1448       /* Store missing values to all the records we're not
1449          using. */
1450       if (max_tail)
1451         max_tail->next_min = NULL;
1452       for (iter = max_head; iter; iter = iter->next_min)
1453         {
1454           int i;
1455
1456           for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1457             {
1458               struct variable *v = dict_get_var (iter->dict, i);
1459               struct variable *mv = get_master (v);
1460
1461               if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1462                 {
1463                   union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1464                   mtf->seq_nums[mv->index] = mtf->seq_num;
1465
1466                   if (v->type == NUMERIC)
1467                     out->f = SYSMIS;
1468                   else
1469                     memset (out->s, ' ', v->width);
1470                 }
1471             }
1472           if (iter->in_var != NULL)
1473             case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1474         }
1475
1476       /* 5. Write the output record. */
1477       casefile_append (mtf->output, &mtf->mtf_case);
1478
1479       /* 6. Read another record from each input file FILE and TABLE
1480          that we stored values from above.  If we come to the end of
1481          one of the input files, remove it from the list of input
1482          files. */
1483       for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1484         {
1485           next = iter->next_min;
1486           if (iter->reader != NULL
1487               && !any_reader_read (iter->reader, &iter->input))
1488             if (!mtf_delete_file_in_place (mtf, &iter))
1489               return false;
1490         }
1491     }
1492   while (!read_active_file
1493          && mtf->head != NULL && mtf->head->type == MTF_FILE);
1494
1495   return true;
1496 }
1497
1498 /* Merge the dictionary for file F into master dictionary M. */
1499 static int
1500 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1501 {
1502   struct dictionary *d = f->dict;
1503   const char *d_docs, *m_docs;
1504   int i;
1505
1506   if (dict_get_label (m) == NULL)
1507     dict_set_label (m, dict_get_label (d));
1508
1509   d_docs = dict_get_documents (d);
1510   m_docs = dict_get_documents (m);
1511   if (d_docs != NULL)
1512     {
1513       if (m_docs == NULL)
1514         dict_set_documents (m, d_docs);
1515       else
1516         {
1517           char *new_docs;
1518           size_t new_len;
1519
1520           new_len = strlen (m_docs) + strlen (d_docs);
1521           new_docs = xmalloc (new_len + 1);
1522           strcpy (new_docs, m_docs);
1523           strcat (new_docs, d_docs);
1524           dict_set_documents (m, new_docs);
1525           free (new_docs);
1526         }
1527     }
1528
1529   for (i = 0; i < dict_get_var_cnt (d); i++)
1530     {
1531       struct variable *dv = dict_get_var (d, i);
1532       struct variable *mv = dict_lookup_var (m, dv->name);
1533
1534       if (dict_class_from_id (dv->name) == DC_SCRATCH)
1535         continue;
1536
1537       if (mv != NULL)
1538         {
1539           if (mv->width != dv->width)
1540             {
1541               msg (SE, _("Variable %s in file %s (%s) has different "
1542                          "type or width from the same variable in "
1543                          "earlier file (%s)."),
1544                    dv->name, fh_get_name (f->handle),
1545                    var_type_description (dv), var_type_description (mv));
1546               return 0;
1547             }
1548
1549           if (dv->width == mv->width)
1550             {
1551               if (val_labs_count (dv->val_labs)
1552                   && !val_labs_count (mv->val_labs))
1553                 {
1554                   val_labs_destroy (mv->val_labs);
1555                   mv->val_labs = val_labs_copy (dv->val_labs);
1556                 }
1557               if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1558                 mv_copy (&mv->miss, &dv->miss);
1559             }
1560
1561           if (dv->label && !mv->label)
1562             mv->label = xstrdup (dv->label);
1563         }
1564       else
1565         mv = dict_clone_var_assert (m, dv, dv->name);
1566     }
1567
1568   return 1;
1569 }
1570
1571 /* Marks V's master variable as MASTER. */
1572 static void
1573 set_master (struct variable *v, struct variable *master)
1574 {
1575   var_attach_aux (v, master, NULL);
1576 }
1577
1578 /* Returns the master variable corresponding to V,
1579    as set with set_master(). */
1580 static struct variable *
1581 get_master (struct variable *v)
1582 {
1583   return v->aux;
1584 }
1585 \f
1586
1587 \f
1588 /* Case map.
1589
1590    A case map copies data from a case that corresponds for one
1591    dictionary to a case that corresponds to a second dictionary
1592    derived from the first by, optionally, deleting, reordering,
1593    or renaming variables.  (No new variables may be created.)
1594    */
1595
1596 /* A case map. */
1597 struct case_map
1598   {
1599     size_t value_cnt;   /* Number of values in map. */
1600     int *map;           /* For each destination index, the
1601                            corresponding source index. */
1602   };
1603
1604 /* Prepares dictionary D for producing a case map.  Afterward,
1605    the caller may delete, reorder, or rename variables within D
1606    at will before using finish_case_map() to produce the case
1607    map.
1608
1609    Uses D's aux members, which must otherwise not be in use. */
1610 static void
1611 start_case_map (struct dictionary *d)
1612 {
1613   size_t var_cnt = dict_get_var_cnt (d);
1614   size_t i;
1615
1616   for (i = 0; i < var_cnt; i++)
1617     {
1618       struct variable *v = dict_get_var (d, i);
1619       int *src_fv = xmalloc (sizeof *src_fv);
1620       *src_fv = v->fv;
1621       var_attach_aux (v, src_fv, var_dtor_free);
1622     }
1623 }
1624
1625 /* Produces a case map from dictionary D, which must have been
1626    previously prepared with start_case_map().
1627
1628    Does not retain any reference to D, and clears the aux members
1629    set up by start_case_map().
1630
1631    Returns the new case map, or a null pointer if no mapping is
1632    required (that is, no data has changed position). */
1633 static struct case_map *
1634 finish_case_map (struct dictionary *d)
1635 {
1636   struct case_map *map;
1637   size_t var_cnt = dict_get_var_cnt (d);
1638   size_t i;
1639   int identity_map;
1640
1641   map = xmalloc (sizeof *map);
1642   map->value_cnt = dict_get_next_value_idx (d);
1643   map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1644   for (i = 0; i < map->value_cnt; i++)
1645     map->map[i] = -1;
1646
1647   identity_map = 1;
1648   for (i = 0; i < var_cnt; i++)
1649     {
1650       struct variable *v = dict_get_var (d, i);
1651       int *src_fv = (int *) var_detach_aux (v);
1652       size_t idx;
1653
1654       if (v->fv != *src_fv)
1655         identity_map = 0;
1656
1657       for (idx = 0; idx < v->nv; idx++)
1658         {
1659           int src_idx = *src_fv + idx;
1660           int dst_idx = v->fv + idx;
1661
1662           assert (map->map[dst_idx] == -1);
1663           map->map[dst_idx] = src_idx;
1664         }
1665       free (src_fv);
1666     }
1667
1668   if (identity_map)
1669     {
1670       destroy_case_map (map);
1671       return NULL;
1672     }
1673
1674   while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1675     map->value_cnt--;
1676
1677   return map;
1678 }
1679
1680 /* Maps from SRC to DST, applying case map MAP. */
1681 static void
1682 map_case (const struct case_map *map,
1683           const struct ccase *src, struct ccase *dst)
1684 {
1685   size_t dst_idx;
1686
1687   assert (map != NULL);
1688   assert (src != NULL);
1689   assert (dst != NULL);
1690   assert (src != dst);
1691
1692   for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1693     {
1694       int src_idx = map->map[dst_idx];
1695       if (src_idx != -1)
1696         *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1697     }
1698 }
1699
1700 /* Destroys case map MAP. */
1701 static void
1702 destroy_case_map (struct case_map *map)
1703 {
1704   if (map != NULL)
1705     {
1706       free (map->map);
1707       free (map);
1708     }
1709 }