1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/fastfile.h>
31 #include <data/dictionary.h>
32 #include <data/por-file-writer.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/storage-stream.h>
36 #include <data/sys-file-writer.h>
37 #include <data/transformations.h>
38 #include <data/value-labels.h>
39 #include <data/variable.h>
40 #include <language/command.h>
41 #include <language/data-io/file-handle.h>
42 #include <language/lexer/lexer.h>
43 #include <language/lexer/variable-parser.h>
44 #include <libpspp/alloc.h>
45 #include <libpspp/assertion.h>
46 #include <libpspp/compiler.h>
47 #include <libpspp/hash.h>
48 #include <libpspp/message.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/str.h>
54 #define _(msgid) gettext (msgid)
56 /* Rearranging and reducing a dictionary. */
57 static void start_case_map (struct dictionary *);
58 static struct case_map *finish_case_map (struct dictionary *);
59 static void map_case (const struct case_map *,
60 const struct ccase *, struct ccase *);
61 static void destroy_case_map (struct case_map *);
63 static bool parse_dict_trim (struct dictionary *);
65 /* Reading system and portable files. */
67 /* Type of command. */
74 /* Case reader input program. */
75 struct case_reader_pgm
77 struct any_reader *reader; /* File reader. */
78 struct case_map *map; /* Map from file dict to active file dict. */
79 struct ccase bounce; /* Bounce buffer. */
82 static const struct case_source_class case_reader_source_class;
84 static void case_reader_pgm_free (struct case_reader_pgm *);
86 /* Parses a GET or IMPORT command. */
88 parse_read_command (enum reader_command type)
90 struct case_reader_pgm *pgm = NULL;
91 struct file_handle *fh = NULL;
92 struct dictionary *dict = NULL;
98 if (lex_match_id ("FILE") || token == T_STRING)
102 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
106 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
110 if (lex_match_id ("COMM"))
112 else if (lex_match_id ("TAPE"))
116 lex_error (_("expecting COMM or TAPE"));
126 lex_sbc_missing ("FILE");
130 discard_variables ();
132 pgm = xmalloc (sizeof *pgm);
133 pgm->reader = any_reader_open (fh, &dict);
135 case_nullify (&pgm->bounce);
136 if (pgm->reader == NULL)
139 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
141 start_case_map (dict);
146 if (!parse_dict_trim (dict))
150 pgm->map = finish_case_map (dict);
152 dict_destroy (default_dict);
155 proc_set_source (create_case_source (&case_reader_source_class, pgm));
160 case_reader_pgm_free (pgm);
163 return CMD_CASCADING_FAILURE;
166 /* Frees a struct case_reader_pgm. */
168 case_reader_pgm_free (struct case_reader_pgm *pgm)
172 any_reader_close (pgm->reader);
173 destroy_case_map (pgm->map);
174 case_destroy (&pgm->bounce);
179 /* Clears internal state related to case reader input procedure. */
181 case_reader_source_destroy (struct case_source *source)
183 struct case_reader_pgm *pgm = source->aux;
184 case_reader_pgm_free (pgm);
187 /* Reads all the cases from the data file into C and passes them
188 to WRITE_CASE one by one, passing WC_DATA.
189 Returns true if successful, false if an I/O error occurred. */
191 case_reader_source_read (struct case_source *source,
193 write_case_func *write_case, write_case_data wc_data)
195 struct case_reader_pgm *pgm = source->aux;
201 if (pgm->map == NULL)
202 got_case = any_reader_read (pgm->reader, c);
205 got_case = any_reader_read (pgm->reader, &pgm->bounce);
207 map_case (pgm->map, &pgm->bounce, c);
212 ok = write_case (wc_data);
216 return ok && !any_reader_error (pgm->reader);
219 static const struct case_source_class case_reader_source_class =
223 case_reader_source_read,
224 case_reader_source_destroy,
231 return parse_read_command (GET_CMD);
238 return parse_read_command (IMPORT_CMD);
241 /* Writing system and portable files. */
243 /* Type of output file. */
246 SYSFILE_WRITER, /* System file. */
247 PORFILE_WRITER /* Portable file. */
250 /* Type of a command. */
253 XFORM_CMD, /* Transformation. */
254 PROC_CMD /* Procedure. */
257 /* File writer plus a case map. */
260 struct any_writer *writer; /* File writer. */
261 struct case_map *map; /* Map to output file dictionary
262 (null pointer for identity mapping). */
263 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
268 case_writer_destroy (struct case_writer *aw)
273 ok = any_writer_close (aw->writer);
274 destroy_case_map (aw->map);
275 case_destroy (&aw->bounce);
281 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
282 WRITER_TYPE identifies the type of file to write,
283 and COMMAND_TYPE identifies the type of command.
285 On success, returns a writer.
286 For procedures only, sets *RETAIN_UNSELECTED to true if cases
287 that would otherwise be excluded by FILTER or USE should be
290 On failure, returns a null pointer. */
291 static struct case_writer *
292 parse_write_command (enum writer_type writer_type,
293 enum command_type command_type,
294 bool *retain_unselected)
297 struct file_handle *handle; /* Output file. */
298 struct dictionary *dict; /* Dictionary for output file. */
299 struct case_writer *aw; /* Writer. */
301 /* Common options. */
302 bool print_map; /* Print map? TODO. */
303 bool print_short_names; /* Print long-to-short name map. TODO. */
304 struct sfm_write_options sysfile_opts;
305 struct pfm_write_options porfile_opts;
307 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
308 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
309 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
311 if (command_type == PROC_CMD)
312 *retain_unselected = true;
315 dict = dict_clone (default_dict);
316 aw = xmalloc (sizeof *aw);
319 case_nullify (&aw->bounce);
321 print_short_names = false;
322 sysfile_opts = sfm_writer_default_options ();
323 porfile_opts = pfm_writer_default_options ();
325 start_case_map (dict);
326 dict_delete_scratch_vars (dict);
331 if (lex_match_id ("OUTFILE"))
335 lex_sbc_only_once ("OUTFILE");
341 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
345 else if (lex_match_id ("NAMES"))
346 print_short_names = true;
347 else if (lex_match_id ("PERMISSIONS"))
352 if (lex_match_id ("READONLY"))
354 else if (lex_match_id ("WRITEABLE"))
358 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
361 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
363 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
366 if (lex_match_id ("RETAIN"))
367 *retain_unselected = true;
368 else if (lex_match_id ("DELETE"))
369 *retain_unselected = false;
372 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
376 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
377 sysfile_opts.compress = true;
378 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
379 sysfile_opts.compress = false;
380 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
383 if (!lex_force_int ())
385 sysfile_opts.version = lex_integer ();
388 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
391 if (lex_match_id ("COMMUNICATIONS"))
392 porfile_opts.type = PFM_COMM;
393 else if (lex_match_id ("TAPE"))
394 porfile_opts.type = PFM_TAPE;
397 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
401 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
404 if (!lex_force_int ())
406 porfile_opts.digits = lex_integer ();
409 else if (!parse_dict_trim (dict))
412 if (!lex_match ('/'))
415 if (lex_end_of_command () != CMD_SUCCESS)
420 lex_sbc_missing ("OUTFILE");
424 dict_compact_values (dict);
425 aw->map = finish_case_map (dict);
427 case_create (&aw->bounce, dict_get_next_value_idx (dict));
429 if (fh_get_referent (handle) == FH_REF_FILE)
434 aw->writer = any_writer_from_sfm_writer (
435 sfm_open_writer (handle, dict, sysfile_opts));
438 aw->writer = any_writer_from_pfm_writer (
439 pfm_open_writer (handle, dict, porfile_opts));
444 aw->writer = any_writer_open (handle, dict);
445 if (aw->writer == NULL)
452 case_writer_destroy (aw);
457 /* Writes case C to writer AW. */
459 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
463 map_case (aw->map, c, &aw->bounce);
466 return any_writer_write (aw->writer, c);
469 /* SAVE and EXPORT. */
471 static bool output_proc (const struct ccase *, void *);
473 /* Parses and performs the SAVE or EXPORT procedure. */
475 parse_output_proc (enum writer_type writer_type)
477 bool retain_unselected;
478 struct variable *saved_filter_variable;
479 struct case_writer *aw;
482 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
484 return CMD_CASCADING_FAILURE;
486 saved_filter_variable = dict_get_filter (default_dict);
487 if (retain_unselected)
488 dict_set_filter (default_dict, NULL);
489 ok = procedure (output_proc, aw);
490 dict_set_filter (default_dict, saved_filter_variable);
492 case_writer_destroy (aw);
493 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
496 /* Writes case C to file. */
498 output_proc (const struct ccase *c, void *aw_)
500 struct case_writer *aw = aw_;
501 return case_writer_write_case (aw, c);
507 return parse_output_proc (SYSFILE_WRITER);
513 return parse_output_proc (PORFILE_WRITER);
516 /* XSAVE and XEXPORT. */
518 /* Transformation. */
521 struct case_writer *aw; /* Writer. */
524 static trns_proc_func output_trns_proc;
525 static trns_free_func output_trns_free;
527 /* Parses the XSAVE or XEXPORT transformation command. */
529 parse_output_trns (enum writer_type writer_type)
531 struct output_trns *t = xmalloc (sizeof *t);
532 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
536 return CMD_CASCADING_FAILURE;
539 add_transformation (output_trns_proc, output_trns_free, t);
543 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
545 output_trns_proc (void *trns_, struct ccase *c, casenum_t case_num UNUSED)
547 struct output_trns *t = trns_;
548 case_writer_write_case (t->aw, c);
549 return TRNS_CONTINUE;
552 /* Frees an XSAVE or XEXPORT transformation.
553 Returns true if successful, false if an I/O error occurred. */
555 output_trns_free (void *trns_)
557 struct output_trns *t = trns_;
562 ok = case_writer_destroy (t->aw);
572 return parse_output_trns (SYSFILE_WRITER);
575 /* XEXPORT command. */
579 return parse_output_trns (PORFILE_WRITER);
582 static bool rename_variables (struct dictionary *dict);
583 static bool drop_variables (struct dictionary *dict);
584 static bool keep_variables (struct dictionary *dict);
586 /* Commands that read and write system files share a great deal
587 of common syntactic structure for rearranging and dropping
588 variables. This function parses this syntax and modifies DICT
589 appropriately. Returns true on success, false on failure. */
591 parse_dict_trim (struct dictionary *dict)
593 if (lex_match_id ("MAP"))
598 else if (lex_match_id ("DROP"))
599 return drop_variables (dict);
600 else if (lex_match_id ("KEEP"))
601 return keep_variables (dict);
602 else if (lex_match_id ("RENAME"))
603 return rename_variables (dict);
606 lex_error (_("expecting a valid subcommand"));
611 /* Parses and performs the RENAME subcommand of GET and SAVE. */
613 rename_variables (struct dictionary *dict)
631 v = parse_dict_variable (dict);
634 if (!lex_force_match ('=')
637 if (dict_lookup_var (dict, tokid) != NULL)
639 msg (SE, _("Cannot rename %s as %s because there already exists "
640 "a variable named %s. To rename variables with "
641 "overlapping names, use a single RENAME subcommand "
642 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
643 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
647 dict_rename_var (dict, v, tokid);
656 while (lex_match ('('))
660 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
662 if (!lex_match ('='))
664 msg (SE, _("`=' expected after variable list."));
667 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
671 msg (SE, _("Number of variables on left side of `=' (%d) does not "
672 "match number of variables on right side (%d), in "
673 "parenthesized group %d of RENAME subcommand."),
674 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
677 if (!lex_force_match (')'))
682 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
684 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
690 for (i = 0; i < nn; i++)
698 /* Parses and performs the DROP subcommand of GET and SAVE.
699 Returns true if successful, false on failure.*/
701 drop_variables (struct dictionary *dict)
707 if (!parse_variables (dict, &v, &nv, PV_NONE))
709 dict_delete_vars (dict, v, nv);
712 if (dict_get_var_cnt (dict) == 0)
714 msg (SE, _("Cannot DROP all variables from dictionary."));
720 /* Parses and performs the KEEP subcommand of GET and SAVE.
721 Returns true if successful, false on failure.*/
723 keep_variables (struct dictionary *dict)
730 if (!parse_variables (dict, &v, &nv, PV_NONE))
733 /* Move the specified variables to the beginning. */
734 dict_reorder_vars (dict, v, nv);
736 /* Delete the remaining variables. */
737 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
738 for (i = nv; i < dict_get_var_cnt (dict); i++)
739 v[i - nv] = dict_get_var (dict, i);
740 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
751 MTF_FILE, /* Specified on FILE= subcommand. */
752 MTF_TABLE /* Specified on TABLE= subcommand. */
755 /* One of the files on MATCH FILES. */
758 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
759 struct mtf_file *next_min; /* Next in the chain of minimums. */
761 int type; /* One of MTF_*. */
762 struct variable **by; /* List of BY variables for this file. */
763 struct file_handle *handle; /* File handle. */
764 struct any_reader *reader; /* File reader. */
765 struct dictionary *dict; /* Dictionary from system file. */
768 char *in_name; /* Variable name. */
769 struct variable *in_var; /* Variable (in master dictionary). */
771 struct ccase input; /* Input record. */
774 /* MATCH FILES procedure. */
777 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
778 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
780 bool ok; /* False if I/O error occurs. */
782 size_t by_cnt; /* Number of variables on BY subcommand. */
784 /* Names of FIRST, LAST variables. */
785 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
787 struct dictionary *dict; /* Dictionary of output file. */
788 struct casefile *output; /* MATCH FILES output. */
789 struct ccase mtf_case; /* Case used for output. */
791 unsigned seq_num; /* Have we initialized this variable? */
792 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
795 static bool mtf_free (struct mtf_proc *);
796 static bool mtf_close_file (struct mtf_file *);
797 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
798 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
800 static bool mtf_read_nonactive_records (void *);
801 static bool mtf_processing_finish (void *);
802 static bool mtf_processing (const struct ccase *, void *);
804 static char *var_type_description (struct variable *);
806 static void set_master (struct variable *, struct variable *master);
807 static struct variable *get_master (struct variable *);
809 /* Parse and execute the MATCH FILES command. */
811 cmd_match_files (void)
814 struct mtf_file *first_table = NULL;
815 struct mtf_file *iter;
817 bool used_active_file = false;
818 bool saw_table = false;
823 mtf.head = mtf.tail = NULL;
827 mtf.dict = dict_create ();
829 case_nullify (&mtf.mtf_case);
832 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
836 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
838 struct mtf_file *file = xmalloc (sizeof *file);
840 if (lex_match_id ("FILE"))
841 file->type = MTF_FILE;
842 else if (lex_match_id ("TABLE"))
844 file->type = MTF_TABLE;
855 file->in_name = NULL;
857 case_nullify (&file->input);
859 /* FILEs go first, then TABLEs. */
860 if (file->type == MTF_TABLE || first_table == NULL)
863 file->prev = mtf.tail;
865 mtf.tail->next = file;
867 if (mtf.head == NULL)
869 if (file->type == MTF_TABLE && first_table == NULL)
874 assert (file->type == MTF_FILE);
875 file->next = first_table;
876 file->prev = first_table->prev;
877 if (first_table->prev)
878 first_table->prev->next = file;
881 first_table->prev = file;
889 if (used_active_file)
891 msg (SE, _("The active file may not be specified more "
895 used_active_file = true;
897 if (!proc_has_source ())
899 msg (SE, _("Cannot specify the active file since no active "
900 "file has been defined."));
904 if (proc_make_temporary_transformations_permanent ())
906 _("MATCH FILES may not be used after TEMPORARY when "
907 "the active file is an input source. "
908 "Temporary transformations will be made permanent."));
910 file->dict = default_dict;
914 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
915 if (file->handle == NULL)
918 file->reader = any_reader_open (file->handle, &file->dict);
919 if (file->reader == NULL)
922 case_create (&file->input, dict_get_next_value_idx (file->dict));
925 while (lex_match ('/'))
926 if (lex_match_id ("RENAME"))
928 if (!rename_variables (file->dict))
931 else if (lex_match_id ("IN"))
940 if (file->in_name != NULL)
942 msg (SE, _("Multiple IN subcommands for a single FILE or "
946 file->in_name = xstrdup (tokid);
951 mtf_merge_dictionary (mtf.dict, file);
956 if (lex_match (T_BY))
958 struct variable **by;
962 msg (SE, _("BY may appear at most once."));
967 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
968 PV_NO_DUPLICATE | PV_NO_SCRATCH))
971 for (iter = mtf.head; iter != NULL; iter = iter->next)
975 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
977 for (i = 0; i < mtf.by_cnt; i++)
979 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
980 if (iter->by[i] == NULL)
982 msg (SE, _("File %s lacks BY variable %s."),
983 iter->handle ? fh_get_name (iter->handle) : "*",
992 else if (lex_match_id ("FIRST"))
994 if (mtf.first[0] != '\0')
996 msg (SE, _("FIRST may appear at most once."));
1001 if (!lex_force_id ())
1003 strcpy (mtf.first, tokid);
1006 else if (lex_match_id ("LAST"))
1008 if (mtf.last[0] != '\0')
1010 msg (SE, _("LAST may appear at most once."));
1015 if (!lex_force_id ())
1017 strcpy (mtf.last, tokid);
1020 else if (lex_match_id ("MAP"))
1024 else if (lex_match_id ("DROP"))
1026 if (!drop_variables (mtf.dict))
1029 else if (lex_match_id ("KEEP"))
1031 if (!keep_variables (mtf.dict))
1040 if (!lex_match ('/') && token != '.')
1042 lex_end_of_command ();
1047 if (mtf.by_cnt == 0)
1051 msg (SE, _("BY is required when TABLE is specified."));
1056 msg (SE, _("BY is required when IN is specified."));
1061 /* Set up mapping from each file's variables to master
1063 for (iter = mtf.head; iter != NULL; iter = iter->next)
1065 struct dictionary *d = iter->dict;
1068 for (i = 0; i < dict_get_var_cnt (d); i++)
1070 struct variable *v = dict_get_var (d, i);
1071 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1077 /* Add IN variables to master dictionary. */
1078 for (iter = mtf.head; iter != NULL; iter = iter->next)
1079 if (iter->in_name != NULL)
1081 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1082 if (iter->in_var == NULL)
1084 msg (SE, _("IN variable name %s duplicates an "
1085 "existing variable name."),
1086 iter->in_var->name);
1089 iter->in_var->print = iter->in_var->write
1090 = make_output_format (FMT_F, 1, 0);
1093 /* MATCH FILES performs an n-way merge on all its input files.
1096 1. Read one input record from every input FILE.
1098 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1100 3. Find the FILE input record(s) that have minimum BY
1101 values. Store all the values from these input records into
1104 4. For every TABLE, read another record as long as the BY values
1105 on the TABLE's input record are less than the FILEs' BY values.
1106 If an exact match is found, store all the values from the TABLE
1107 input record into the output record.
1109 5. Write the output record.
1111 6. Read another record from each input file FILE and TABLE that
1112 we stored values from above. If we come to the end of one of the
1113 input files, remove it from the list of input files.
1115 7. Repeat from step 2.
1117 Unfortunately, this algorithm can't be implemented in a
1118 straightforward way because there's no function to read a
1119 record from the active file. Instead, it has to be written
1122 FIXME: For merging large numbers of files (more than 10?) a
1123 better algorithm would use a heap for finding minimum
1126 if (!used_active_file)
1127 discard_variables ();
1129 dict_compact_values (mtf.dict);
1130 mtf.output = fastfile_create (dict_get_next_value_idx (mtf.dict));
1131 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1132 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1134 if (!mtf_read_nonactive_records (&mtf))
1137 if (used_active_file)
1139 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1140 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1143 ok = mtf_processing_finish (&mtf);
1145 discard_variables ();
1147 dict_destroy (default_dict);
1148 default_dict = mtf.dict;
1150 proc_set_source (storage_source_create (mtf.output));
1153 if (!mtf_free (&mtf))
1155 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1159 return CMD_CASCADING_FAILURE;
1162 /* Repeats 2...7 an arbitrary number of times. */
1164 mtf_processing_finish (void *mtf_)
1166 struct mtf_proc *mtf = mtf_;
1167 struct mtf_file *iter;
1169 /* Find the active file and delete it. */
1170 for (iter = mtf->head; iter; iter = iter->next)
1171 if (iter->handle == NULL)
1173 if (!mtf_delete_file_in_place (mtf, &iter))
1178 while (mtf->head && mtf->head->type == MTF_FILE)
1179 if (!mtf_processing (NULL, mtf))
1185 /* Return a string in a static buffer describing V's variable type and
1188 var_type_description (struct variable *v)
1190 static char buf[2][32];
1197 if (v->type == NUMERIC)
1198 strcpy (s, "numeric");
1201 assert (v->type == ALPHA);
1202 sprintf (s, "string with width %d", v->width);
1207 /* Closes FILE and frees its associated data.
1208 Returns true if successful, false if an I/O error
1209 occurred on FILE. */
1211 mtf_close_file (struct mtf_file *file)
1213 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1215 any_reader_close (file->reader);
1216 if (file->handle != NULL)
1217 dict_destroy (file->dict);
1218 case_destroy (&file->input);
1219 free (file->in_name);
1224 /* Free all the data for the MATCH FILES procedure.
1225 Returns true if successful, false if an I/O error
1228 mtf_free (struct mtf_proc *mtf)
1230 struct mtf_file *iter, *next;
1233 for (iter = mtf->head; iter; iter = next)
1236 assert (iter->dict != mtf->dict);
1237 if (!mtf_close_file (iter))
1242 dict_destroy (mtf->dict);
1243 case_destroy (&mtf->mtf_case);
1244 free (mtf->seq_nums);
1249 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1250 file in the chain, or to NULL if was the last in the chain.
1251 Returns true if successful, false if an I/O error occurred. */
1253 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1255 struct mtf_file *f = *file;
1259 f->prev->next = f->next;
1261 f->next->prev = f->prev;
1263 mtf->head = f->next;
1265 mtf->tail = f->prev;
1268 if (f->in_var != NULL)
1269 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1270 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1272 struct variable *v = dict_get_var (f->dict, i);
1273 struct variable *mv = get_master (v);
1276 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1278 if (v->type == NUMERIC)
1281 memset (out->s, ' ', v->width);
1285 return mtf_close_file (f);
1288 /* Read a record from every input file except the active file.
1289 Returns true if successful, false if an I/O error occurred. */
1291 mtf_read_nonactive_records (void *mtf_)
1293 struct mtf_proc *mtf = mtf_;
1294 struct mtf_file *iter, *next;
1297 for (iter = mtf->head; ok && iter != NULL; iter = next)
1300 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1301 if (!mtf_delete_file_in_place (mtf, &iter))
1307 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1308 if A == B, 1 if A > B. */
1310 mtf_compare_BY_values (struct mtf_proc *mtf,
1311 struct mtf_file *a, struct mtf_file *b,
1312 const struct ccase *c)
1314 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1315 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1316 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1317 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1320 /* Perform one iteration of steps 3...7 above.
1321 Returns true if successful, false if an I/O error occurred. */
1323 mtf_processing (const struct ccase *c, void *mtf_)
1325 struct mtf_proc *mtf = mtf_;
1327 /* Do we need another record from the active file? */
1328 bool read_active_file;
1330 assert (mtf->head != NULL);
1331 if (mtf->head->type == MTF_TABLE)
1336 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1337 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1338 struct mtf_file *iter, *next;
1340 read_active_file = false;
1342 /* 3. Find the FILE input record(s) that have minimum BY
1343 values. Store all the values from these input records into
1344 the output record. */
1345 min_head = min_tail = mtf->head;
1346 max_head = max_tail = NULL;
1347 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1350 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1354 max_tail = max_tail->next_min = iter;
1356 max_head = max_tail = iter;
1359 min_tail = min_tail->next_min = iter;
1364 max_tail->next_min = min_head;
1365 max_tail = min_tail;
1369 max_head = min_head;
1370 max_tail = min_tail;
1372 min_head = min_tail = iter;
1376 /* 4. For every TABLE, read another record as long as the BY
1377 values on the TABLE's input record are less than the FILEs'
1378 BY values. If an exact match is found, store all the values
1379 from the TABLE input record into the output record. */
1380 for (; iter != NULL; iter = next)
1382 assert (iter->type == MTF_TABLE);
1387 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1391 max_tail = max_tail->next_min = iter;
1393 max_head = max_tail = iter;
1396 min_tail = min_tail->next_min = iter;
1399 if (iter->handle == NULL)
1401 if (any_reader_read (iter->reader, &iter->input))
1403 if (!mtf_delete_file_in_place (mtf, &iter))
1410 /* Next sequence number. */
1413 /* Store data to all the records we are using. */
1415 min_tail->next_min = NULL;
1416 for (iter = min_head; iter; iter = iter->next_min)
1420 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1422 struct variable *v = dict_get_var (iter->dict, i);
1423 struct variable *mv = get_master (v);
1425 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1427 const struct ccase *record
1428 = case_is_null (&iter->input) ? c : &iter->input;
1429 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1431 mtf->seq_nums[mv->index] = mtf->seq_num;
1432 if (v->type == NUMERIC)
1433 out->f = case_num (record, v->fv);
1435 memcpy (out->s, case_str (record, v->fv), v->width);
1438 if (iter->in_var != NULL)
1439 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1441 if (iter->type == MTF_FILE && iter->handle == NULL)
1442 read_active_file = true;
1445 /* Store missing values to all the records we're not
1448 max_tail->next_min = NULL;
1449 for (iter = max_head; iter; iter = iter->next_min)
1453 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1455 struct variable *v = dict_get_var (iter->dict, i);
1456 struct variable *mv = get_master (v);
1458 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1460 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1461 mtf->seq_nums[mv->index] = mtf->seq_num;
1463 if (v->type == NUMERIC)
1466 memset (out->s, ' ', v->width);
1469 if (iter->in_var != NULL)
1470 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1473 /* 5. Write the output record. */
1474 casefile_append (mtf->output, &mtf->mtf_case);
1476 /* 6. Read another record from each input file FILE and TABLE
1477 that we stored values from above. If we come to the end of
1478 one of the input files, remove it from the list of input
1480 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1482 next = iter->next_min;
1483 if (iter->reader != NULL
1484 && !any_reader_read (iter->reader, &iter->input))
1485 if (!mtf_delete_file_in_place (mtf, &iter))
1489 while (!read_active_file
1490 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1495 /* Merge the dictionary for file F into master dictionary M. */
1497 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1499 struct dictionary *d = f->dict;
1500 const char *d_docs, *m_docs;
1503 if (dict_get_label (m) == NULL)
1504 dict_set_label (m, dict_get_label (d));
1506 d_docs = dict_get_documents (d);
1507 m_docs = dict_get_documents (m);
1511 dict_set_documents (m, d_docs);
1517 new_len = strlen (m_docs) + strlen (d_docs);
1518 new_docs = xmalloc (new_len + 1);
1519 strcpy (new_docs, m_docs);
1520 strcat (new_docs, d_docs);
1521 dict_set_documents (m, new_docs);
1526 for (i = 0; i < dict_get_var_cnt (d); i++)
1528 struct variable *dv = dict_get_var (d, i);
1529 struct variable *mv = dict_lookup_var (m, dv->name);
1531 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1536 if (mv->width != dv->width)
1538 msg (SE, _("Variable %s in file %s (%s) has different "
1539 "type or width from the same variable in "
1540 "earlier file (%s)."),
1541 dv->name, fh_get_name (f->handle),
1542 var_type_description (dv), var_type_description (mv));
1546 if (dv->width == mv->width)
1548 if (val_labs_count (dv->val_labs)
1549 && !val_labs_count (mv->val_labs))
1551 val_labs_destroy (mv->val_labs);
1552 mv->val_labs = val_labs_copy (dv->val_labs);
1554 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1555 mv_copy (&mv->miss, &dv->miss);
1558 if (dv->label && !mv->label)
1559 mv->label = xstrdup (dv->label);
1562 mv = dict_clone_var_assert (m, dv, dv->name);
1568 /* Marks V's master variable as MASTER. */
1570 set_master (struct variable *v, struct variable *master)
1572 var_attach_aux (v, master, NULL);
1575 /* Returns the master variable corresponding to V,
1576 as set with set_master(). */
1577 static struct variable *
1578 get_master (struct variable *v)
1587 A case map copies data from a case that corresponds for one
1588 dictionary to a case that corresponds to a second dictionary
1589 derived from the first by, optionally, deleting, reordering,
1590 or renaming variables. (No new variables may be created.)
1596 size_t value_cnt; /* Number of values in map. */
1597 int *map; /* For each destination index, the
1598 corresponding source index. */
1601 /* Prepares dictionary D for producing a case map. Afterward,
1602 the caller may delete, reorder, or rename variables within D
1603 at will before using finish_case_map() to produce the case
1606 Uses D's aux members, which must otherwise not be in use. */
1608 start_case_map (struct dictionary *d)
1610 size_t var_cnt = dict_get_var_cnt (d);
1613 for (i = 0; i < var_cnt; i++)
1615 struct variable *v = dict_get_var (d, i);
1616 int *src_fv = xmalloc (sizeof *src_fv);
1618 var_attach_aux (v, src_fv, var_dtor_free);
1622 /* Produces a case map from dictionary D, which must have been
1623 previously prepared with start_case_map().
1625 Does not retain any reference to D, and clears the aux members
1626 set up by start_case_map().
1628 Returns the new case map, or a null pointer if no mapping is
1629 required (that is, no data has changed position). */
1630 static struct case_map *
1631 finish_case_map (struct dictionary *d)
1633 struct case_map *map;
1634 size_t var_cnt = dict_get_var_cnt (d);
1638 map = xmalloc (sizeof *map);
1639 map->value_cnt = dict_get_next_value_idx (d);
1640 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1641 for (i = 0; i < map->value_cnt; i++)
1645 for (i = 0; i < var_cnt; i++)
1647 struct variable *v = dict_get_var (d, i);
1648 int *src_fv = (int *) var_detach_aux (v);
1651 if (v->fv != *src_fv)
1654 for (idx = 0; idx < v->nv; idx++)
1656 int src_idx = *src_fv + idx;
1657 int dst_idx = v->fv + idx;
1659 assert (map->map[dst_idx] == -1);
1660 map->map[dst_idx] = src_idx;
1667 destroy_case_map (map);
1671 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1677 /* Maps from SRC to DST, applying case map MAP. */
1679 map_case (const struct case_map *map,
1680 const struct ccase *src, struct ccase *dst)
1684 assert (map != NULL);
1685 assert (src != NULL);
1686 assert (dst != NULL);
1687 assert (src != dst);
1689 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1691 int src_idx = map->map[dst_idx];
1693 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1697 /* Destroys case map MAP. */
1699 destroy_case_map (struct case_map *map)