1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/fastfile.h>
31 #include <data/dictionary.h>
32 #include <data/por-file-writer.h>
33 #include <data/procedure.h>
34 #include <data/settings.h>
35 #include <data/storage-stream.h>
36 #include <data/sys-file-writer.h>
37 #include <data/transformations.h>
38 #include <data/value-labels.h>
39 #include <data/variable.h>
40 #include <language/command.h>
41 #include <language/data-io/file-handle.h>
42 #include <language/lexer/lexer.h>
43 #include <language/lexer/variable-parser.h>
44 #include <libpspp/alloc.h>
45 #include <libpspp/assertion.h>
46 #include <libpspp/compiler.h>
47 #include <libpspp/hash.h>
48 #include <libpspp/message.h>
49 #include <libpspp/message.h>
50 #include <libpspp/misc.h>
51 #include <libpspp/str.h>
54 #define _(msgid) gettext (msgid)
56 /* Rearranging and reducing a dictionary. */
57 static void start_case_map (struct dictionary *);
58 static struct case_map *finish_case_map (struct dictionary *);
59 static void map_case (const struct case_map *,
60 const struct ccase *, struct ccase *);
61 static void destroy_case_map (struct case_map *);
63 static bool parse_dict_trim (struct dictionary *);
65 /* Reading system and portable files. */
67 /* Type of command. */
74 /* Case reader input program. */
75 struct case_reader_pgm
77 struct any_reader *reader; /* File reader. */
78 struct case_map *map; /* Map from file dict to active file dict. */
79 struct ccase bounce; /* Bounce buffer. */
82 static const struct case_source_class case_reader_source_class;
84 static void case_reader_pgm_free (struct case_reader_pgm *);
86 /* Parses a GET or IMPORT command. */
88 parse_read_command (enum reader_command type)
90 struct case_reader_pgm *pgm = NULL;
91 struct file_handle *fh = NULL;
92 struct dictionary *dict = NULL;
98 if (lex_match_id ("FILE") || token == T_STRING)
102 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
106 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
110 if (lex_match_id ("COMM"))
112 else if (lex_match_id ("TAPE"))
116 lex_error (_("expecting COMM or TAPE"));
126 lex_sbc_missing ("FILE");
130 discard_variables (current_dataset);
132 pgm = xmalloc (sizeof *pgm);
133 pgm->reader = any_reader_open (fh, &dict);
135 case_nullify (&pgm->bounce);
136 if (pgm->reader == NULL)
139 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
141 start_case_map (dict);
146 if (!parse_dict_trim (dict))
150 pgm->map = finish_case_map (dict);
152 dict_destroy (dataset_dict (current_dataset));
153 dataset_set_dict (current_dataset, dict);
155 proc_set_source (current_dataset,
156 create_case_source (&case_reader_source_class, pgm));
161 case_reader_pgm_free (pgm);
164 return CMD_CASCADING_FAILURE;
167 /* Frees a struct case_reader_pgm. */
169 case_reader_pgm_free (struct case_reader_pgm *pgm)
173 any_reader_close (pgm->reader);
174 destroy_case_map (pgm->map);
175 case_destroy (&pgm->bounce);
180 /* Clears internal state related to case reader input procedure. */
182 case_reader_source_destroy (struct case_source *source)
184 struct case_reader_pgm *pgm = source->aux;
185 case_reader_pgm_free (pgm);
188 /* Reads all the cases from the data file into C and passes them
189 to WRITE_CASE one by one, passing WC_DATA.
190 Returns true if successful, false if an I/O error occurred. */
192 case_reader_source_read (struct case_source *source,
194 write_case_func *write_case, write_case_data wc_data)
196 struct case_reader_pgm *pgm = source->aux;
202 if (pgm->map == NULL)
203 got_case = any_reader_read (pgm->reader, c);
206 got_case = any_reader_read (pgm->reader, &pgm->bounce);
208 map_case (pgm->map, &pgm->bounce, c);
213 ok = write_case (wc_data);
217 return ok && !any_reader_error (pgm->reader);
220 static const struct case_source_class case_reader_source_class =
224 case_reader_source_read,
225 case_reader_source_destroy,
232 return parse_read_command (GET_CMD);
239 return parse_read_command (IMPORT_CMD);
242 /* Writing system and portable files. */
244 /* Type of output file. */
247 SYSFILE_WRITER, /* System file. */
248 PORFILE_WRITER /* Portable file. */
251 /* Type of a command. */
254 XFORM_CMD, /* Transformation. */
255 PROC_CMD /* Procedure. */
258 /* File writer plus a case map. */
261 struct any_writer *writer; /* File writer. */
262 struct case_map *map; /* Map to output file dictionary
263 (null pointer for identity mapping). */
264 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
269 case_writer_destroy (struct case_writer *aw)
274 ok = any_writer_close (aw->writer);
275 destroy_case_map (aw->map);
276 case_destroy (&aw->bounce);
282 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
283 WRITER_TYPE identifies the type of file to write,
284 and COMMAND_TYPE identifies the type of command.
286 On success, returns a writer.
287 For procedures only, sets *RETAIN_UNSELECTED to true if cases
288 that would otherwise be excluded by FILTER or USE should be
291 On failure, returns a null pointer. */
292 static struct case_writer *
293 parse_write_command (enum writer_type writer_type,
294 enum command_type command_type,
295 bool *retain_unselected)
298 struct file_handle *handle; /* Output file. */
299 struct dictionary *dict; /* Dictionary for output file. */
300 struct case_writer *aw; /* Writer. */
302 /* Common options. */
303 bool print_map; /* Print map? TODO. */
304 bool print_short_names; /* Print long-to-short name map. TODO. */
305 struct sfm_write_options sysfile_opts;
306 struct pfm_write_options porfile_opts;
308 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
309 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
310 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
312 if (command_type == PROC_CMD)
313 *retain_unselected = true;
316 dict = dict_clone (dataset_dict (current_dataset));
317 aw = xmalloc (sizeof *aw);
320 case_nullify (&aw->bounce);
322 print_short_names = false;
323 sysfile_opts = sfm_writer_default_options ();
324 porfile_opts = pfm_writer_default_options ();
326 start_case_map (dict);
327 dict_delete_scratch_vars (dict);
332 if (lex_match_id ("OUTFILE"))
336 lex_sbc_only_once ("OUTFILE");
342 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
346 else if (lex_match_id ("NAMES"))
347 print_short_names = true;
348 else if (lex_match_id ("PERMISSIONS"))
353 if (lex_match_id ("READONLY"))
355 else if (lex_match_id ("WRITEABLE"))
359 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
362 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
364 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
367 if (lex_match_id ("RETAIN"))
368 *retain_unselected = true;
369 else if (lex_match_id ("DELETE"))
370 *retain_unselected = false;
373 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
377 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
378 sysfile_opts.compress = true;
379 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
380 sysfile_opts.compress = false;
381 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
384 if (!lex_force_int ())
386 sysfile_opts.version = lex_integer ();
389 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
392 if (lex_match_id ("COMMUNICATIONS"))
393 porfile_opts.type = PFM_COMM;
394 else if (lex_match_id ("TAPE"))
395 porfile_opts.type = PFM_TAPE;
398 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
402 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
405 if (!lex_force_int ())
407 porfile_opts.digits = lex_integer ();
410 else if (!parse_dict_trim (dict))
413 if (!lex_match ('/'))
416 if (lex_end_of_command () != CMD_SUCCESS)
421 lex_sbc_missing ("OUTFILE");
425 dict_compact_values (dict);
426 aw->map = finish_case_map (dict);
428 case_create (&aw->bounce, dict_get_next_value_idx (dict));
430 if (fh_get_referent (handle) == FH_REF_FILE)
435 aw->writer = any_writer_from_sfm_writer (
436 sfm_open_writer (handle, dict, sysfile_opts));
439 aw->writer = any_writer_from_pfm_writer (
440 pfm_open_writer (handle, dict, porfile_opts));
445 aw->writer = any_writer_open (handle, dict);
446 if (aw->writer == NULL)
453 case_writer_destroy (aw);
458 /* Writes case C to writer AW. */
460 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
464 map_case (aw->map, c, &aw->bounce);
467 return any_writer_write (aw->writer, c);
470 /* SAVE and EXPORT. */
472 static bool output_proc (const struct ccase *, void *);
474 /* Parses and performs the SAVE or EXPORT procedure. */
476 parse_output_proc (enum writer_type writer_type)
478 bool retain_unselected;
479 struct variable *saved_filter_variable;
480 struct case_writer *aw;
483 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
485 return CMD_CASCADING_FAILURE;
487 saved_filter_variable = dict_get_filter (dataset_dict (current_dataset));
488 if (retain_unselected)
489 dict_set_filter (dataset_dict (current_dataset), NULL);
490 ok = procedure (current_dataset,output_proc, aw);
491 dict_set_filter (dataset_dict (current_dataset), saved_filter_variable);
493 case_writer_destroy (aw);
494 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
497 /* Writes case C to file. */
499 output_proc (const struct ccase *c, void *aw_)
501 struct case_writer *aw = aw_;
502 return case_writer_write_case (aw, c);
508 return parse_output_proc (SYSFILE_WRITER);
514 return parse_output_proc (PORFILE_WRITER);
517 /* XSAVE and XEXPORT. */
519 /* Transformation. */
522 struct case_writer *aw; /* Writer. */
525 static trns_proc_func output_trns_proc;
526 static trns_free_func output_trns_free;
528 /* Parses the XSAVE or XEXPORT transformation command. */
530 parse_output_trns (enum writer_type writer_type)
532 struct output_trns *t = xmalloc (sizeof *t);
533 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
537 return CMD_CASCADING_FAILURE;
540 add_transformation (current_dataset, output_trns_proc, output_trns_free, t);
544 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
546 output_trns_proc (void *trns_, struct ccase *c, casenum_t case_num UNUSED)
548 struct output_trns *t = trns_;
549 case_writer_write_case (t->aw, c);
550 return TRNS_CONTINUE;
553 /* Frees an XSAVE or XEXPORT transformation.
554 Returns true if successful, false if an I/O error occurred. */
556 output_trns_free (void *trns_)
558 struct output_trns *t = trns_;
563 ok = case_writer_destroy (t->aw);
573 return parse_output_trns (SYSFILE_WRITER);
576 /* XEXPORT command. */
580 return parse_output_trns (PORFILE_WRITER);
583 static bool rename_variables (struct dictionary *dict);
584 static bool drop_variables (struct dictionary *dict);
585 static bool keep_variables (struct dictionary *dict);
587 /* Commands that read and write system files share a great deal
588 of common syntactic structure for rearranging and dropping
589 variables. This function parses this syntax and modifies DICT
590 appropriately. Returns true on success, false on failure. */
592 parse_dict_trim (struct dictionary *dict)
594 if (lex_match_id ("MAP"))
599 else if (lex_match_id ("DROP"))
600 return drop_variables (dict);
601 else if (lex_match_id ("KEEP"))
602 return keep_variables (dict);
603 else if (lex_match_id ("RENAME"))
604 return rename_variables (dict);
607 lex_error (_("expecting a valid subcommand"));
612 /* Parses and performs the RENAME subcommand of GET and SAVE. */
614 rename_variables (struct dictionary *dict)
632 v = parse_dict_variable (dict);
635 if (!lex_force_match ('=')
638 if (dict_lookup_var (dict, tokid) != NULL)
640 msg (SE, _("Cannot rename %s as %s because there already exists "
641 "a variable named %s. To rename variables with "
642 "overlapping names, use a single RENAME subcommand "
643 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
644 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
648 dict_rename_var (dict, v, tokid);
657 while (lex_match ('('))
661 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
663 if (!lex_match ('='))
665 msg (SE, _("`=' expected after variable list."));
668 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
672 msg (SE, _("Number of variables on left side of `=' (%d) does not "
673 "match number of variables on right side (%d), in "
674 "parenthesized group %d of RENAME subcommand."),
675 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
678 if (!lex_force_match (')'))
683 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
685 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
691 for (i = 0; i < nn; i++)
699 /* Parses and performs the DROP subcommand of GET and SAVE.
700 Returns true if successful, false on failure.*/
702 drop_variables (struct dictionary *dict)
708 if (!parse_variables (dict, &v, &nv, PV_NONE))
710 dict_delete_vars (dict, v, nv);
713 if (dict_get_var_cnt (dict) == 0)
715 msg (SE, _("Cannot DROP all variables from dictionary."));
721 /* Parses and performs the KEEP subcommand of GET and SAVE.
722 Returns true if successful, false on failure.*/
724 keep_variables (struct dictionary *dict)
731 if (!parse_variables (dict, &v, &nv, PV_NONE))
734 /* Move the specified variables to the beginning. */
735 dict_reorder_vars (dict, v, nv);
737 /* Delete the remaining variables. */
738 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
739 for (i = nv; i < dict_get_var_cnt (dict); i++)
740 v[i - nv] = dict_get_var (dict, i);
741 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
752 MTF_FILE, /* Specified on FILE= subcommand. */
753 MTF_TABLE /* Specified on TABLE= subcommand. */
756 /* One of the files on MATCH FILES. */
759 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
760 struct mtf_file *next_min; /* Next in the chain of minimums. */
762 int type; /* One of MTF_*. */
763 struct variable **by; /* List of BY variables for this file. */
764 struct file_handle *handle; /* File handle. */
765 struct any_reader *reader; /* File reader. */
766 struct dictionary *dict; /* Dictionary from system file. */
769 char *in_name; /* Variable name. */
770 struct variable *in_var; /* Variable (in master dictionary). */
772 struct ccase input; /* Input record. */
775 /* MATCH FILES procedure. */
778 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
779 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
781 bool ok; /* False if I/O error occurs. */
783 size_t by_cnt; /* Number of variables on BY subcommand. */
785 /* Names of FIRST, LAST variables. */
786 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
788 struct dictionary *dict; /* Dictionary of output file. */
789 struct casefile *output; /* MATCH FILES output. */
790 struct ccase mtf_case; /* Case used for output. */
792 unsigned seq_num; /* Have we initialized this variable? */
793 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
796 static bool mtf_free (struct mtf_proc *);
797 static bool mtf_close_file (struct mtf_file *);
798 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
799 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
801 static bool mtf_read_nonactive_records (void *);
802 static bool mtf_processing_finish (void *);
803 static bool mtf_processing (const struct ccase *, void *);
805 static char *var_type_description (struct variable *);
807 static void set_master (struct variable *, struct variable *master);
808 static struct variable *get_master (struct variable *);
810 /* Parse and execute the MATCH FILES command. */
812 cmd_match_files (void)
815 struct mtf_file *first_table = NULL;
816 struct mtf_file *iter;
818 bool used_active_file = false;
819 bool saw_table = false;
824 mtf.head = mtf.tail = NULL;
828 mtf.dict = dict_create ();
830 case_nullify (&mtf.mtf_case);
833 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (current_dataset)));
837 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
839 struct mtf_file *file = xmalloc (sizeof *file);
841 if (lex_match_id ("FILE"))
842 file->type = MTF_FILE;
843 else if (lex_match_id ("TABLE"))
845 file->type = MTF_TABLE;
856 file->in_name = NULL;
858 case_nullify (&file->input);
860 /* FILEs go first, then TABLEs. */
861 if (file->type == MTF_TABLE || first_table == NULL)
864 file->prev = mtf.tail;
866 mtf.tail->next = file;
868 if (mtf.head == NULL)
870 if (file->type == MTF_TABLE && first_table == NULL)
875 assert (file->type == MTF_FILE);
876 file->next = first_table;
877 file->prev = first_table->prev;
878 if (first_table->prev)
879 first_table->prev->next = file;
882 first_table->prev = file;
890 if (used_active_file)
892 msg (SE, _("The active file may not be specified more "
896 used_active_file = true;
898 if (!proc_has_source (current_dataset))
900 msg (SE, _("Cannot specify the active file since no active "
901 "file has been defined."));
905 if (proc_make_temporary_transformations_permanent (current_dataset))
907 _("MATCH FILES may not be used after TEMPORARY when "
908 "the active file is an input source. "
909 "Temporary transformations will be made permanent."));
911 file->dict = dataset_dict (current_dataset);
915 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
916 if (file->handle == NULL)
919 file->reader = any_reader_open (file->handle, &file->dict);
920 if (file->reader == NULL)
923 case_create (&file->input, dict_get_next_value_idx (file->dict));
926 while (lex_match ('/'))
927 if (lex_match_id ("RENAME"))
929 if (!rename_variables (file->dict))
932 else if (lex_match_id ("IN"))
941 if (file->in_name != NULL)
943 msg (SE, _("Multiple IN subcommands for a single FILE or "
947 file->in_name = xstrdup (tokid);
952 mtf_merge_dictionary (mtf.dict, file);
957 if (lex_match (T_BY))
959 struct variable **by;
963 msg (SE, _("BY may appear at most once."));
968 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
969 PV_NO_DUPLICATE | PV_NO_SCRATCH))
972 for (iter = mtf.head; iter != NULL; iter = iter->next)
976 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
978 for (i = 0; i < mtf.by_cnt; i++)
980 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
981 if (iter->by[i] == NULL)
983 msg (SE, _("File %s lacks BY variable %s."),
984 iter->handle ? fh_get_name (iter->handle) : "*",
993 else if (lex_match_id ("FIRST"))
995 if (mtf.first[0] != '\0')
997 msg (SE, _("FIRST may appear at most once."));
1002 if (!lex_force_id ())
1004 strcpy (mtf.first, tokid);
1007 else if (lex_match_id ("LAST"))
1009 if (mtf.last[0] != '\0')
1011 msg (SE, _("LAST may appear at most once."));
1016 if (!lex_force_id ())
1018 strcpy (mtf.last, tokid);
1021 else if (lex_match_id ("MAP"))
1025 else if (lex_match_id ("DROP"))
1027 if (!drop_variables (mtf.dict))
1030 else if (lex_match_id ("KEEP"))
1032 if (!keep_variables (mtf.dict))
1041 if (!lex_match ('/') && token != '.')
1043 lex_end_of_command ();
1048 if (mtf.by_cnt == 0)
1052 msg (SE, _("BY is required when TABLE is specified."));
1057 msg (SE, _("BY is required when IN is specified."));
1062 /* Set up mapping from each file's variables to master
1064 for (iter = mtf.head; iter != NULL; iter = iter->next)
1066 struct dictionary *d = iter->dict;
1069 for (i = 0; i < dict_get_var_cnt (d); i++)
1071 struct variable *v = dict_get_var (d, i);
1072 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1078 /* Add IN variables to master dictionary. */
1079 for (iter = mtf.head; iter != NULL; iter = iter->next)
1080 if (iter->in_name != NULL)
1082 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1083 if (iter->in_var == NULL)
1085 msg (SE, _("IN variable name %s duplicates an "
1086 "existing variable name."),
1087 iter->in_var->name);
1090 iter->in_var->print = iter->in_var->write
1091 = make_output_format (FMT_F, 1, 0);
1094 /* MATCH FILES performs an n-way merge on all its input files.
1097 1. Read one input record from every input FILE.
1099 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1101 3. Find the FILE input record(s) that have minimum BY
1102 values. Store all the values from these input records into
1105 4. For every TABLE, read another record as long as the BY values
1106 on the TABLE's input record are less than the FILEs' BY values.
1107 If an exact match is found, store all the values from the TABLE
1108 input record into the output record.
1110 5. Write the output record.
1112 6. Read another record from each input file FILE and TABLE that
1113 we stored values from above. If we come to the end of one of the
1114 input files, remove it from the list of input files.
1116 7. Repeat from step 2.
1118 Unfortunately, this algorithm can't be implemented in a
1119 straightforward way because there's no function to read a
1120 record from the active file. Instead, it has to be written
1123 FIXME: For merging large numbers of files (more than 10?) a
1124 better algorithm would use a heap for finding minimum
1127 if (!used_active_file)
1128 discard_variables (current_dataset);
1130 dict_compact_values (mtf.dict);
1131 mtf.output = fastfile_create (dict_get_next_value_idx (mtf.dict));
1132 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1133 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1135 if (!mtf_read_nonactive_records (&mtf))
1138 if (used_active_file)
1140 proc_set_sink (current_dataset,
1141 create_case_sink (&null_sink_class,
1142 dataset_dict (current_dataset), NULL));
1143 ok = procedure (current_dataset,mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1146 ok = mtf_processing_finish (&mtf);
1148 discard_variables (current_dataset);
1150 dict_destroy (dataset_dict (current_dataset));
1151 dataset_set_dict (current_dataset, mtf.dict);
1153 proc_set_source (current_dataset, storage_source_create (mtf.output));
1156 if (!mtf_free (&mtf))
1158 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1162 return CMD_CASCADING_FAILURE;
1165 /* Repeats 2...7 an arbitrary number of times. */
1167 mtf_processing_finish (void *mtf_)
1169 struct mtf_proc *mtf = mtf_;
1170 struct mtf_file *iter;
1172 /* Find the active file and delete it. */
1173 for (iter = mtf->head; iter; iter = iter->next)
1174 if (iter->handle == NULL)
1176 if (!mtf_delete_file_in_place (mtf, &iter))
1181 while (mtf->head && mtf->head->type == MTF_FILE)
1182 if (!mtf_processing (NULL, mtf))
1188 /* Return a string in a static buffer describing V's variable type and
1191 var_type_description (struct variable *v)
1193 static char buf[2][32];
1200 if (v->type == NUMERIC)
1201 strcpy (s, "numeric");
1204 assert (v->type == ALPHA);
1205 sprintf (s, "string with width %d", v->width);
1210 /* Closes FILE and frees its associated data.
1211 Returns true if successful, false if an I/O error
1212 occurred on FILE. */
1214 mtf_close_file (struct mtf_file *file)
1216 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1218 any_reader_close (file->reader);
1219 if (file->handle != NULL)
1220 dict_destroy (file->dict);
1221 case_destroy (&file->input);
1222 free (file->in_name);
1227 /* Free all the data for the MATCH FILES procedure.
1228 Returns true if successful, false if an I/O error
1231 mtf_free (struct mtf_proc *mtf)
1233 struct mtf_file *iter, *next;
1236 for (iter = mtf->head; iter; iter = next)
1239 assert (iter->dict != mtf->dict);
1240 if (!mtf_close_file (iter))
1245 dict_destroy (mtf->dict);
1246 case_destroy (&mtf->mtf_case);
1247 free (mtf->seq_nums);
1252 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1253 file in the chain, or to NULL if was the last in the chain.
1254 Returns true if successful, false if an I/O error occurred. */
1256 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1258 struct mtf_file *f = *file;
1262 f->prev->next = f->next;
1264 f->next->prev = f->prev;
1266 mtf->head = f->next;
1268 mtf->tail = f->prev;
1271 if (f->in_var != NULL)
1272 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1273 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1275 struct variable *v = dict_get_var (f->dict, i);
1276 struct variable *mv = get_master (v);
1279 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1281 if (v->type == NUMERIC)
1284 memset (out->s, ' ', v->width);
1288 return mtf_close_file (f);
1291 /* Read a record from every input file except the active file.
1292 Returns true if successful, false if an I/O error occurred. */
1294 mtf_read_nonactive_records (void *mtf_)
1296 struct mtf_proc *mtf = mtf_;
1297 struct mtf_file *iter, *next;
1300 for (iter = mtf->head; ok && iter != NULL; iter = next)
1303 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1304 if (!mtf_delete_file_in_place (mtf, &iter))
1310 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1311 if A == B, 1 if A > B. */
1313 mtf_compare_BY_values (struct mtf_proc *mtf,
1314 struct mtf_file *a, struct mtf_file *b,
1315 const struct ccase *c)
1317 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1318 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1319 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1320 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1323 /* Perform one iteration of steps 3...7 above.
1324 Returns true if successful, false if an I/O error occurred. */
1326 mtf_processing (const struct ccase *c, void *mtf_)
1328 struct mtf_proc *mtf = mtf_;
1330 /* Do we need another record from the active file? */
1331 bool read_active_file;
1333 assert (mtf->head != NULL);
1334 if (mtf->head->type == MTF_TABLE)
1339 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1340 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1341 struct mtf_file *iter, *next;
1343 read_active_file = false;
1345 /* 3. Find the FILE input record(s) that have minimum BY
1346 values. Store all the values from these input records into
1347 the output record. */
1348 min_head = min_tail = mtf->head;
1349 max_head = max_tail = NULL;
1350 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1353 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1357 max_tail = max_tail->next_min = iter;
1359 max_head = max_tail = iter;
1362 min_tail = min_tail->next_min = iter;
1367 max_tail->next_min = min_head;
1368 max_tail = min_tail;
1372 max_head = min_head;
1373 max_tail = min_tail;
1375 min_head = min_tail = iter;
1379 /* 4. For every TABLE, read another record as long as the BY
1380 values on the TABLE's input record are less than the FILEs'
1381 BY values. If an exact match is found, store all the values
1382 from the TABLE input record into the output record. */
1383 for (; iter != NULL; iter = next)
1385 assert (iter->type == MTF_TABLE);
1390 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1394 max_tail = max_tail->next_min = iter;
1396 max_head = max_tail = iter;
1399 min_tail = min_tail->next_min = iter;
1402 if (iter->handle == NULL)
1404 if (any_reader_read (iter->reader, &iter->input))
1406 if (!mtf_delete_file_in_place (mtf, &iter))
1413 /* Next sequence number. */
1416 /* Store data to all the records we are using. */
1418 min_tail->next_min = NULL;
1419 for (iter = min_head; iter; iter = iter->next_min)
1423 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1425 struct variable *v = dict_get_var (iter->dict, i);
1426 struct variable *mv = get_master (v);
1428 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1430 const struct ccase *record
1431 = case_is_null (&iter->input) ? c : &iter->input;
1432 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1434 mtf->seq_nums[mv->index] = mtf->seq_num;
1435 if (v->type == NUMERIC)
1436 out->f = case_num (record, v->fv);
1438 memcpy (out->s, case_str (record, v->fv), v->width);
1441 if (iter->in_var != NULL)
1442 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1444 if (iter->type == MTF_FILE && iter->handle == NULL)
1445 read_active_file = true;
1448 /* Store missing values to all the records we're not
1451 max_tail->next_min = NULL;
1452 for (iter = max_head; iter; iter = iter->next_min)
1456 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1458 struct variable *v = dict_get_var (iter->dict, i);
1459 struct variable *mv = get_master (v);
1461 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1463 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1464 mtf->seq_nums[mv->index] = mtf->seq_num;
1466 if (v->type == NUMERIC)
1469 memset (out->s, ' ', v->width);
1472 if (iter->in_var != NULL)
1473 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1476 /* 5. Write the output record. */
1477 casefile_append (mtf->output, &mtf->mtf_case);
1479 /* 6. Read another record from each input file FILE and TABLE
1480 that we stored values from above. If we come to the end of
1481 one of the input files, remove it from the list of input
1483 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1485 next = iter->next_min;
1486 if (iter->reader != NULL
1487 && !any_reader_read (iter->reader, &iter->input))
1488 if (!mtf_delete_file_in_place (mtf, &iter))
1492 while (!read_active_file
1493 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1498 /* Merge the dictionary for file F into master dictionary M. */
1500 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1502 struct dictionary *d = f->dict;
1503 const char *d_docs, *m_docs;
1506 if (dict_get_label (m) == NULL)
1507 dict_set_label (m, dict_get_label (d));
1509 d_docs = dict_get_documents (d);
1510 m_docs = dict_get_documents (m);
1514 dict_set_documents (m, d_docs);
1520 new_len = strlen (m_docs) + strlen (d_docs);
1521 new_docs = xmalloc (new_len + 1);
1522 strcpy (new_docs, m_docs);
1523 strcat (new_docs, d_docs);
1524 dict_set_documents (m, new_docs);
1529 for (i = 0; i < dict_get_var_cnt (d); i++)
1531 struct variable *dv = dict_get_var (d, i);
1532 struct variable *mv = dict_lookup_var (m, dv->name);
1534 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1539 if (mv->width != dv->width)
1541 msg (SE, _("Variable %s in file %s (%s) has different "
1542 "type or width from the same variable in "
1543 "earlier file (%s)."),
1544 dv->name, fh_get_name (f->handle),
1545 var_type_description (dv), var_type_description (mv));
1549 if (dv->width == mv->width)
1551 if (val_labs_count (dv->val_labs)
1552 && !val_labs_count (mv->val_labs))
1554 val_labs_destroy (mv->val_labs);
1555 mv->val_labs = val_labs_copy (dv->val_labs);
1557 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1558 mv_copy (&mv->miss, &dv->miss);
1561 if (dv->label && !mv->label)
1562 mv->label = xstrdup (dv->label);
1565 mv = dict_clone_var_assert (m, dv, dv->name);
1571 /* Marks V's master variable as MASTER. */
1573 set_master (struct variable *v, struct variable *master)
1575 var_attach_aux (v, master, NULL);
1578 /* Returns the master variable corresponding to V,
1579 as set with set_master(). */
1580 static struct variable *
1581 get_master (struct variable *v)
1590 A case map copies data from a case that corresponds for one
1591 dictionary to a case that corresponds to a second dictionary
1592 derived from the first by, optionally, deleting, reordering,
1593 or renaming variables. (No new variables may be created.)
1599 size_t value_cnt; /* Number of values in map. */
1600 int *map; /* For each destination index, the
1601 corresponding source index. */
1604 /* Prepares dictionary D for producing a case map. Afterward,
1605 the caller may delete, reorder, or rename variables within D
1606 at will before using finish_case_map() to produce the case
1609 Uses D's aux members, which must otherwise not be in use. */
1611 start_case_map (struct dictionary *d)
1613 size_t var_cnt = dict_get_var_cnt (d);
1616 for (i = 0; i < var_cnt; i++)
1618 struct variable *v = dict_get_var (d, i);
1619 int *src_fv = xmalloc (sizeof *src_fv);
1621 var_attach_aux (v, src_fv, var_dtor_free);
1625 /* Produces a case map from dictionary D, which must have been
1626 previously prepared with start_case_map().
1628 Does not retain any reference to D, and clears the aux members
1629 set up by start_case_map().
1631 Returns the new case map, or a null pointer if no mapping is
1632 required (that is, no data has changed position). */
1633 static struct case_map *
1634 finish_case_map (struct dictionary *d)
1636 struct case_map *map;
1637 size_t var_cnt = dict_get_var_cnt (d);
1641 map = xmalloc (sizeof *map);
1642 map->value_cnt = dict_get_next_value_idx (d);
1643 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1644 for (i = 0; i < map->value_cnt; i++)
1648 for (i = 0; i < var_cnt; i++)
1650 struct variable *v = dict_get_var (d, i);
1651 int *src_fv = (int *) var_detach_aux (v);
1654 if (v->fv != *src_fv)
1657 for (idx = 0; idx < v->nv; idx++)
1659 int src_idx = *src_fv + idx;
1660 int dst_idx = v->fv + idx;
1662 assert (map->map[dst_idx] == -1);
1663 map->map[dst_idx] = src_idx;
1670 destroy_case_map (map);
1674 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1680 /* Maps from SRC to DST, applying case map MAP. */
1682 map_case (const struct case_map *map,
1683 const struct ccase *src, struct ccase *dst)
1687 assert (map != NULL);
1688 assert (src != NULL);
1689 assert (dst != NULL);
1690 assert (src != dst);
1692 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1694 int src_idx = map->map[dst_idx];
1696 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1700 /* Destroys case map MAP. */
1702 destroy_case_map (struct case_map *map)