1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/dictionary.h>
30 #include <data/por-file-writer.h>
31 #include <data/procedure.h>
32 #include <data/settings.h>
33 #include <data/storage-stream.h>
34 #include <data/sys-file-writer.h>
35 #include <data/transformations.h>
36 #include <data/value-labels.h>
37 #include <data/variable.h>
38 #include <language/command.h>
39 #include <language/data-io/file-handle.h>
40 #include <language/lexer/lexer.h>
41 #include <libpspp/alloc.h>
42 #include <libpspp/compiler.h>
43 #include <libpspp/hash.h>
44 #include <libpspp/message.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/str.h>
50 #define _(msgid) gettext (msgid)
52 /* Rearranging and reducing a dictionary. */
53 static void start_case_map (struct dictionary *);
54 static struct case_map *finish_case_map (struct dictionary *);
55 static void map_case (const struct case_map *,
56 const struct ccase *, struct ccase *);
57 static void destroy_case_map (struct case_map *);
59 static bool parse_dict_trim (struct dictionary *);
61 /* Reading system and portable files. */
63 /* Type of command. */
70 /* Case reader input program. */
71 struct case_reader_pgm
73 struct any_reader *reader; /* File reader. */
74 struct case_map *map; /* Map from file dict to active file dict. */
75 struct ccase bounce; /* Bounce buffer. */
78 static const struct case_source_class case_reader_source_class;
80 static void case_reader_pgm_free (struct case_reader_pgm *);
82 /* Parses a GET or IMPORT command. */
84 parse_read_command (enum reader_command type)
86 struct case_reader_pgm *pgm = NULL;
87 struct file_handle *fh = NULL;
88 struct dictionary *dict = NULL;
94 if (lex_match_id ("FILE") || token == T_STRING)
98 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
102 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
106 if (lex_match_id ("COMM"))
108 else if (lex_match_id ("TAPE"))
112 lex_error (_("expecting COMM or TAPE"));
122 lex_sbc_missing ("FILE");
126 discard_variables ();
128 pgm = xmalloc (sizeof *pgm);
129 pgm->reader = any_reader_open (fh, &dict);
131 case_nullify (&pgm->bounce);
132 if (pgm->reader == NULL)
135 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
137 start_case_map (dict);
142 if (!parse_dict_trim (dict))
146 pgm->map = finish_case_map (dict);
148 dict_destroy (default_dict);
151 proc_set_source (create_case_source (&case_reader_source_class, pgm));
156 case_reader_pgm_free (pgm);
159 return CMD_CASCADING_FAILURE;
162 /* Frees a struct case_reader_pgm. */
164 case_reader_pgm_free (struct case_reader_pgm *pgm)
168 any_reader_close (pgm->reader);
169 destroy_case_map (pgm->map);
170 case_destroy (&pgm->bounce);
175 /* Clears internal state related to case reader input procedure. */
177 case_reader_source_destroy (struct case_source *source)
179 struct case_reader_pgm *pgm = source->aux;
180 case_reader_pgm_free (pgm);
183 /* Reads all the cases from the data file into C and passes them
184 to WRITE_CASE one by one, passing WC_DATA.
185 Returns true if successful, false if an I/O error occurred. */
187 case_reader_source_read (struct case_source *source,
189 write_case_func *write_case, write_case_data wc_data)
191 struct case_reader_pgm *pgm = source->aux;
197 if (pgm->map == NULL)
198 got_case = any_reader_read (pgm->reader, c);
201 got_case = any_reader_read (pgm->reader, &pgm->bounce);
203 map_case (pgm->map, &pgm->bounce, c);
208 ok = write_case (wc_data);
212 return ok && !any_reader_error (pgm->reader);
215 static const struct case_source_class case_reader_source_class =
219 case_reader_source_read,
220 case_reader_source_destroy,
227 return parse_read_command (GET_CMD);
234 return parse_read_command (IMPORT_CMD);
237 /* Writing system and portable files. */
239 /* Type of output file. */
242 SYSFILE_WRITER, /* System file. */
243 PORFILE_WRITER /* Portable file. */
246 /* Type of a command. */
249 XFORM_CMD, /* Transformation. */
250 PROC_CMD /* Procedure. */
253 /* File writer plus a case map. */
256 struct any_writer *writer; /* File writer. */
257 struct case_map *map; /* Map to output file dictionary
258 (null pointer for identity mapping). */
259 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
264 case_writer_destroy (struct case_writer *aw)
269 ok = any_writer_close (aw->writer);
270 destroy_case_map (aw->map);
271 case_destroy (&aw->bounce);
277 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
278 WRITER_TYPE identifies the type of file to write,
279 and COMMAND_TYPE identifies the type of command.
281 On success, returns a writer.
282 For procedures only, sets *RETAIN_UNSELECTED to true if cases
283 that would otherwise be excluded by FILTER or USE should be
286 On failure, returns a null pointer. */
287 static struct case_writer *
288 parse_write_command (enum writer_type writer_type,
289 enum command_type command_type,
290 bool *retain_unselected)
293 struct file_handle *handle; /* Output file. */
294 struct dictionary *dict; /* Dictionary for output file. */
295 struct case_writer *aw; /* Writer. */
297 /* Common options. */
298 bool print_map; /* Print map? TODO. */
299 bool print_short_names; /* Print long-to-short name map. TODO. */
300 struct sfm_write_options sysfile_opts;
301 struct pfm_write_options porfile_opts;
303 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
304 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
305 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
307 if (command_type == PROC_CMD)
308 *retain_unselected = true;
311 dict = dict_clone (default_dict);
312 aw = xmalloc (sizeof *aw);
315 case_nullify (&aw->bounce);
317 print_short_names = false;
318 sysfile_opts = sfm_writer_default_options ();
319 porfile_opts = pfm_writer_default_options ();
321 start_case_map (dict);
322 dict_delete_scratch_vars (dict);
327 if (lex_match_id ("OUTFILE"))
331 lex_sbc_only_once ("OUTFILE");
337 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
341 else if (lex_match_id ("NAMES"))
342 print_short_names = true;
343 else if (lex_match_id ("PERMISSIONS"))
348 if (lex_match_id ("READONLY"))
350 else if (lex_match_id ("WRITEABLE"))
354 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
357 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
359 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
362 if (lex_match_id ("RETAIN"))
363 *retain_unselected = true;
364 else if (lex_match_id ("DELETE"))
365 *retain_unselected = false;
368 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
372 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
373 sysfile_opts.compress = true;
374 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
375 sysfile_opts.compress = false;
376 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
379 if (!lex_force_int ())
381 sysfile_opts.version = lex_integer ();
384 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
387 if (lex_match_id ("COMMUNICATIONS"))
388 porfile_opts.type = PFM_COMM;
389 else if (lex_match_id ("TAPE"))
390 porfile_opts.type = PFM_TAPE;
393 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
397 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
400 if (!lex_force_int ())
402 porfile_opts.digits = lex_integer ();
405 else if (!parse_dict_trim (dict))
408 if (!lex_match ('/'))
411 if (lex_end_of_command () != CMD_SUCCESS)
416 lex_sbc_missing ("OUTFILE");
420 dict_compact_values (dict);
421 aw->map = finish_case_map (dict);
423 case_create (&aw->bounce, dict_get_next_value_idx (dict));
425 if (fh_get_referent (handle) == FH_REF_FILE)
430 aw->writer = any_writer_from_sfm_writer (
431 sfm_open_writer (handle, dict, sysfile_opts));
434 aw->writer = any_writer_from_pfm_writer (
435 pfm_open_writer (handle, dict, porfile_opts));
440 aw->writer = any_writer_open (handle, dict);
446 case_writer_destroy (aw);
451 /* Writes case C to writer AW. */
453 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
457 map_case (aw->map, c, &aw->bounce);
460 return any_writer_write (aw->writer, c);
463 /* SAVE and EXPORT. */
465 static bool output_proc (const struct ccase *, void *);
467 /* Parses and performs the SAVE or EXPORT procedure. */
469 parse_output_proc (enum writer_type writer_type)
471 bool retain_unselected;
472 struct variable *saved_filter_variable;
473 struct case_writer *aw;
476 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
478 return CMD_CASCADING_FAILURE;
480 saved_filter_variable = dict_get_filter (default_dict);
481 if (retain_unselected)
482 dict_set_filter (default_dict, NULL);
483 ok = procedure (output_proc, aw);
484 dict_set_filter (default_dict, saved_filter_variable);
486 case_writer_destroy (aw);
487 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
490 /* Writes case C to file. */
492 output_proc (const struct ccase *c, void *aw_)
494 struct case_writer *aw = aw_;
495 return case_writer_write_case (aw, c);
501 return parse_output_proc (SYSFILE_WRITER);
507 return parse_output_proc (PORFILE_WRITER);
510 /* XSAVE and XEXPORT. */
512 /* Transformation. */
515 struct case_writer *aw; /* Writer. */
518 static trns_proc_func output_trns_proc;
519 static trns_free_func output_trns_free;
521 /* Parses the XSAVE or XEXPORT transformation command. */
523 parse_output_trns (enum writer_type writer_type)
525 struct output_trns *t = xmalloc (sizeof *t);
526 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
530 return CMD_CASCADING_FAILURE;
533 add_transformation (output_trns_proc, output_trns_free, t);
537 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
539 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
541 struct output_trns *t = trns_;
542 case_writer_write_case (t->aw, c);
543 return TRNS_CONTINUE;
546 /* Frees an XSAVE or XEXPORT transformation.
547 Returns true if successful, false if an I/O error occurred. */
549 output_trns_free (void *trns_)
551 struct output_trns *t = trns_;
556 ok = case_writer_destroy (t->aw);
566 return parse_output_trns (SYSFILE_WRITER);
569 /* XEXPORT command. */
573 return parse_output_trns (PORFILE_WRITER);
576 static bool rename_variables (struct dictionary *dict);
577 static bool drop_variables (struct dictionary *dict);
578 static bool keep_variables (struct dictionary *dict);
580 /* Commands that read and write system files share a great deal
581 of common syntactic structure for rearranging and dropping
582 variables. This function parses this syntax and modifies DICT
583 appropriately. Returns true on success, false on failure. */
585 parse_dict_trim (struct dictionary *dict)
587 if (lex_match_id ("MAP"))
592 else if (lex_match_id ("DROP"))
593 return drop_variables (dict);
594 else if (lex_match_id ("KEEP"))
595 return keep_variables (dict);
596 else if (lex_match_id ("RENAME"))
597 return rename_variables (dict);
600 lex_error (_("expecting a valid subcommand"));
605 /* Parses and performs the RENAME subcommand of GET and SAVE. */
607 rename_variables (struct dictionary *dict)
625 v = parse_dict_variable (dict);
628 if (!lex_force_match ('=')
631 if (dict_lookup_var (dict, tokid) != NULL)
633 msg (SE, _("Cannot rename %s as %s because there already exists "
634 "a variable named %s. To rename variables with "
635 "overlapping names, use a single RENAME subcommand "
636 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
637 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
641 dict_rename_var (dict, v, tokid);
650 while (lex_match ('('))
654 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
656 if (!lex_match ('='))
658 msg (SE, _("`=' expected after variable list."));
661 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
665 msg (SE, _("Number of variables on left side of `=' (%d) does not "
666 "match number of variables on right side (%d), in "
667 "parenthesized group %d of RENAME subcommand."),
668 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
671 if (!lex_force_match (')'))
676 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
678 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
684 for (i = 0; i < nn; i++)
692 /* Parses and performs the DROP subcommand of GET and SAVE.
693 Returns true if successful, false on failure.*/
695 drop_variables (struct dictionary *dict)
701 if (!parse_variables (dict, &v, &nv, PV_NONE))
703 dict_delete_vars (dict, v, nv);
706 if (dict_get_var_cnt (dict) == 0)
708 msg (SE, _("Cannot DROP all variables from dictionary."));
714 /* Parses and performs the KEEP subcommand of GET and SAVE.
715 Returns true if successful, false on failure.*/
717 keep_variables (struct dictionary *dict)
724 if (!parse_variables (dict, &v, &nv, PV_NONE))
727 /* Move the specified variables to the beginning. */
728 dict_reorder_vars (dict, v, nv);
730 /* Delete the remaining variables. */
731 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
732 for (i = nv; i < dict_get_var_cnt (dict); i++)
733 v[i - nv] = dict_get_var (dict, i);
734 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
745 MTF_FILE, /* Specified on FILE= subcommand. */
746 MTF_TABLE /* Specified on TABLE= subcommand. */
749 /* One of the files on MATCH FILES. */
752 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
753 struct mtf_file *next_min; /* Next in the chain of minimums. */
755 int type; /* One of MTF_*. */
756 struct variable **by; /* List of BY variables for this file. */
757 struct file_handle *handle; /* File handle. */
758 struct any_reader *reader; /* File reader. */
759 struct dictionary *dict; /* Dictionary from system file. */
762 char *in_name; /* Variable name. */
763 struct variable *in_var; /* Variable (in master dictionary). */
765 struct ccase input; /* Input record. */
768 /* MATCH FILES procedure. */
771 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
772 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
774 bool ok; /* False if I/O error occurs. */
776 size_t by_cnt; /* Number of variables on BY subcommand. */
778 /* Names of FIRST, LAST variables. */
779 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
781 struct dictionary *dict; /* Dictionary of output file. */
782 struct case_sink *sink; /* Sink to receive output. */
783 struct ccase mtf_case; /* Case used for output. */
785 unsigned seq_num; /* Have we initialized this variable? */
786 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
789 static bool mtf_free (struct mtf_proc *);
790 static bool mtf_close_file (struct mtf_file *);
791 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
792 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
794 static bool mtf_read_nonactive_records (void *);
795 static bool mtf_processing_finish (void *);
796 static bool mtf_processing (const struct ccase *, void *);
798 static char *var_type_description (struct variable *);
800 static void set_master (struct variable *, struct variable *master);
801 static struct variable *get_master (struct variable *);
803 /* Parse and execute the MATCH FILES command. */
805 cmd_match_files (void)
808 struct mtf_file *first_table = NULL;
809 struct mtf_file *iter;
811 bool used_active_file = false;
812 bool saw_table = false;
817 mtf.head = mtf.tail = NULL;
821 mtf.dict = dict_create ();
823 case_nullify (&mtf.mtf_case);
826 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
830 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
832 struct mtf_file *file = xmalloc (sizeof *file);
834 if (lex_match_id ("FILE"))
835 file->type = MTF_FILE;
836 else if (lex_match_id ("TABLE"))
838 file->type = MTF_TABLE;
849 file->in_name = NULL;
851 case_nullify (&file->input);
853 /* FILEs go first, then TABLEs. */
854 if (file->type == MTF_TABLE || first_table == NULL)
857 file->prev = mtf.tail;
859 mtf.tail->next = file;
861 if (mtf.head == NULL)
863 if (file->type == MTF_TABLE && first_table == NULL)
868 assert (file->type == MTF_FILE);
869 file->next = first_table;
870 file->prev = first_table->prev;
871 if (first_table->prev)
872 first_table->prev->next = file;
875 first_table->prev = file;
883 if (used_active_file)
885 msg (SE, _("The active file may not be specified more "
889 used_active_file = true;
891 if (!proc_has_source ())
893 msg (SE, _("Cannot specify the active file since no active "
894 "file has been defined."));
898 if (proc_make_temporary_transformations_permanent ())
900 _("MATCH FILES may not be used after TEMPORARY when "
901 "the active file is an input source. "
902 "Temporary transformations will be made permanent."));
904 file->dict = default_dict;
908 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
909 if (file->handle == NULL)
912 file->reader = any_reader_open (file->handle, &file->dict);
913 if (file->reader == NULL)
916 case_create (&file->input, dict_get_next_value_idx (file->dict));
919 while (lex_match ('/'))
920 if (lex_match_id ("RENAME"))
922 if (!rename_variables (file->dict))
925 else if (lex_match_id ("IN"))
934 if (file->in_name != NULL)
936 msg (SE, _("Multiple IN subcommands for a single FILE or "
940 file->in_name = xstrdup (tokid);
945 mtf_merge_dictionary (mtf.dict, file);
950 if (lex_match (T_BY))
952 struct variable **by;
956 msg (SE, _("BY may appear at most once."));
961 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
962 PV_NO_DUPLICATE | PV_NO_SCRATCH))
965 for (iter = mtf.head; iter != NULL; iter = iter->next)
969 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
971 for (i = 0; i < mtf.by_cnt; i++)
973 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
974 if (iter->by[i] == NULL)
976 msg (SE, _("File %s lacks BY variable %s."),
977 iter->handle ? fh_get_name (iter->handle) : "*",
986 else if (lex_match_id ("FIRST"))
988 if (mtf.first[0] != '\0')
990 msg (SE, _("FIRST may appear at most once."));
995 if (!lex_force_id ())
997 strcpy (mtf.first, tokid);
1000 else if (lex_match_id ("LAST"))
1002 if (mtf.last[0] != '\0')
1004 msg (SE, _("LAST may appear at most once."));
1009 if (!lex_force_id ())
1011 strcpy (mtf.last, tokid);
1014 else if (lex_match_id ("MAP"))
1018 else if (lex_match_id ("DROP"))
1020 if (!drop_variables (mtf.dict))
1023 else if (lex_match_id ("KEEP"))
1025 if (!keep_variables (mtf.dict))
1034 if (!lex_match ('/') && token != '.')
1036 lex_end_of_command ();
1041 if (mtf.by_cnt == 0)
1045 msg (SE, _("BY is required when TABLE is specified."));
1050 msg (SE, _("BY is required when IN is specified."));
1055 /* Set up mapping from each file's variables to master
1057 for (iter = mtf.head; iter != NULL; iter = iter->next)
1059 struct dictionary *d = iter->dict;
1062 for (i = 0; i < dict_get_var_cnt (d); i++)
1064 struct variable *v = dict_get_var (d, i);
1065 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1071 /* Add IN variables to master dictionary. */
1072 for (iter = mtf.head; iter != NULL; iter = iter->next)
1073 if (iter->in_name != NULL)
1075 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1076 if (iter->in_var == NULL)
1078 msg (SE, _("IN variable name %s duplicates an "
1079 "existing variable name."),
1080 iter->in_var->name);
1083 iter->in_var->print = iter->in_var->write
1084 = make_output_format (FMT_F, 1, 0);
1087 /* MATCH FILES performs an n-way merge on all its input files.
1090 1. Read one input record from every input FILE.
1092 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1094 3. Find the FILE input record(s) that have minimum BY
1095 values. Store all the values from these input records into
1098 4. For every TABLE, read another record as long as the BY values
1099 on the TABLE's input record are less than the FILEs' BY values.
1100 If an exact match is found, store all the values from the TABLE
1101 input record into the output record.
1103 5. Write the output record.
1105 6. Read another record from each input file FILE and TABLE that
1106 we stored values from above. If we come to the end of one of the
1107 input files, remove it from the list of input files.
1109 7. Repeat from step 2.
1111 Unfortunately, this algorithm can't be implemented in a
1112 straightforward way because there's no function to read a
1113 record from the active file. Instead, it has to be written
1116 FIXME: For merging large numbers of files (more than 10?) a
1117 better algorithm would use a heap for finding minimum
1120 if (!used_active_file)
1121 discard_variables ();
1123 dict_compact_values (mtf.dict);
1124 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1125 if (mtf.sink->class->open != NULL)
1126 mtf.sink->class->open (mtf.sink);
1128 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1129 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1131 if (!mtf_read_nonactive_records (&mtf))
1134 if (used_active_file)
1136 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1137 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1140 ok = mtf_processing_finish (&mtf);
1142 discard_variables ();
1144 default_dict = mtf.dict;
1146 proc_set_source (mtf.sink->class->make_source (mtf.sink));
1147 free_case_sink (mtf.sink);
1149 if (!mtf_free (&mtf))
1151 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1155 return CMD_CASCADING_FAILURE;
1158 /* Repeats 2...7 an arbitrary number of times. */
1160 mtf_processing_finish (void *mtf_)
1162 struct mtf_proc *mtf = mtf_;
1163 struct mtf_file *iter;
1165 /* Find the active file and delete it. */
1166 for (iter = mtf->head; iter; iter = iter->next)
1167 if (iter->handle == NULL)
1169 if (!mtf_delete_file_in_place (mtf, &iter))
1174 while (mtf->head && mtf->head->type == MTF_FILE)
1175 if (!mtf_processing (NULL, mtf))
1181 /* Return a string in a static buffer describing V's variable type and
1184 var_type_description (struct variable *v)
1186 static char buf[2][32];
1193 if (v->type == NUMERIC)
1194 strcpy (s, "numeric");
1197 assert (v->type == ALPHA);
1198 sprintf (s, "string with width %d", v->width);
1203 /* Closes FILE and frees its associated data.
1204 Returns true if successful, false if an I/O error
1205 occurred on FILE. */
1207 mtf_close_file (struct mtf_file *file)
1209 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1211 any_reader_close (file->reader);
1212 if (file->handle != NULL)
1213 dict_destroy (file->dict);
1214 case_destroy (&file->input);
1215 free (file->in_name);
1220 /* Free all the data for the MATCH FILES procedure.
1221 Returns true if successful, false if an I/O error
1224 mtf_free (struct mtf_proc *mtf)
1226 struct mtf_file *iter, *next;
1229 for (iter = mtf->head; iter; iter = next)
1232 assert (iter->dict != mtf->dict);
1233 if (!mtf_close_file (iter))
1238 dict_destroy (mtf->dict);
1239 case_destroy (&mtf->mtf_case);
1240 free (mtf->seq_nums);
1245 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1246 file in the chain, or to NULL if was the last in the chain.
1247 Returns true if successful, false if an I/O error occurred. */
1249 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1251 struct mtf_file *f = *file;
1255 f->prev->next = f->next;
1257 f->next->prev = f->prev;
1259 mtf->head = f->next;
1261 mtf->tail = f->prev;
1264 if (f->in_var != NULL)
1265 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1266 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1268 struct variable *v = dict_get_var (f->dict, i);
1269 struct variable *mv = get_master (v);
1272 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1274 if (v->type == NUMERIC)
1277 memset (out->s, ' ', v->width);
1281 return mtf_close_file (f);
1284 /* Read a record from every input file except the active file.
1285 Returns true if successful, false if an I/O error occurred. */
1287 mtf_read_nonactive_records (void *mtf_)
1289 struct mtf_proc *mtf = mtf_;
1290 struct mtf_file *iter, *next;
1293 for (iter = mtf->head; ok && iter != NULL; iter = next)
1296 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1297 if (!mtf_delete_file_in_place (mtf, &iter))
1303 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1304 if A == B, 1 if A > B. */
1306 mtf_compare_BY_values (struct mtf_proc *mtf,
1307 struct mtf_file *a, struct mtf_file *b,
1308 const struct ccase *c)
1310 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1311 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1312 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1313 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1316 /* Perform one iteration of steps 3...7 above.
1317 Returns true if successful, false if an I/O error occurred. */
1319 mtf_processing (const struct ccase *c, void *mtf_)
1321 struct mtf_proc *mtf = mtf_;
1323 /* Do we need another record from the active file? */
1324 bool read_active_file;
1326 assert (mtf->head != NULL);
1327 if (mtf->head->type == MTF_TABLE)
1332 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1333 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1334 struct mtf_file *iter, *next;
1336 read_active_file = false;
1338 /* 3. Find the FILE input record(s) that have minimum BY
1339 values. Store all the values from these input records into
1340 the output record. */
1341 min_head = min_tail = mtf->head;
1342 max_head = max_tail = NULL;
1343 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1346 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1350 max_tail = max_tail->next_min = iter;
1352 max_head = max_tail = iter;
1355 min_tail = min_tail->next_min = iter;
1360 max_tail->next_min = min_head;
1361 max_tail = min_tail;
1365 max_head = min_head;
1366 max_tail = min_tail;
1368 min_head = min_tail = iter;
1372 /* 4. For every TABLE, read another record as long as the BY
1373 values on the TABLE's input record are less than the FILEs'
1374 BY values. If an exact match is found, store all the values
1375 from the TABLE input record into the output record. */
1376 for (; iter != NULL; iter = next)
1378 assert (iter->type == MTF_TABLE);
1383 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1387 max_tail = max_tail->next_min = iter;
1389 max_head = max_tail = iter;
1392 min_tail = min_tail->next_min = iter;
1395 if (iter->handle == NULL)
1397 if (any_reader_read (iter->reader, &iter->input))
1399 if (!mtf_delete_file_in_place (mtf, &iter))
1406 /* Next sequence number. */
1409 /* Store data to all the records we are using. */
1411 min_tail->next_min = NULL;
1412 for (iter = min_head; iter; iter = iter->next_min)
1416 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1418 struct variable *v = dict_get_var (iter->dict, i);
1419 struct variable *mv = get_master (v);
1421 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1423 const struct ccase *record
1424 = case_is_null (&iter->input) ? c : &iter->input;
1425 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1427 mtf->seq_nums[mv->index] = mtf->seq_num;
1428 if (v->type == NUMERIC)
1429 out->f = case_num (record, v->fv);
1431 memcpy (out->s, case_str (record, v->fv), v->width);
1434 if (iter->in_var != NULL)
1435 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1437 if (iter->type == MTF_FILE && iter->handle == NULL)
1438 read_active_file = true;
1441 /* Store missing values to all the records we're not
1444 max_tail->next_min = NULL;
1445 for (iter = max_head; iter; iter = iter->next_min)
1449 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1451 struct variable *v = dict_get_var (iter->dict, i);
1452 struct variable *mv = get_master (v);
1454 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1456 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1457 mtf->seq_nums[mv->index] = mtf->seq_num;
1459 if (v->type == NUMERIC)
1462 memset (out->s, ' ', v->width);
1465 if (iter->in_var != NULL)
1466 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1469 /* 5. Write the output record. */
1470 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1472 /* 6. Read another record from each input file FILE and TABLE
1473 that we stored values from above. If we come to the end of
1474 one of the input files, remove it from the list of input
1476 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1478 next = iter->next_min;
1479 if (iter->reader != NULL
1480 && !any_reader_read (iter->reader, &iter->input))
1481 if (!mtf_delete_file_in_place (mtf, &iter))
1485 while (!read_active_file
1486 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1491 /* Merge the dictionary for file F into master dictionary M. */
1493 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1495 struct dictionary *d = f->dict;
1496 const char *d_docs, *m_docs;
1499 if (dict_get_label (m) == NULL)
1500 dict_set_label (m, dict_get_label (d));
1502 d_docs = dict_get_documents (d);
1503 m_docs = dict_get_documents (m);
1507 dict_set_documents (m, d_docs);
1513 new_len = strlen (m_docs) + strlen (d_docs);
1514 new_docs = xmalloc (new_len + 1);
1515 strcpy (new_docs, m_docs);
1516 strcat (new_docs, d_docs);
1517 dict_set_documents (m, new_docs);
1522 for (i = 0; i < dict_get_var_cnt (d); i++)
1524 struct variable *dv = dict_get_var (d, i);
1525 struct variable *mv = dict_lookup_var (m, dv->name);
1527 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1532 if (mv->width != dv->width)
1534 msg (SE, _("Variable %s in file %s (%s) has different "
1535 "type or width from the same variable in "
1536 "earlier file (%s)."),
1537 dv->name, fh_get_name (f->handle),
1538 var_type_description (dv), var_type_description (mv));
1542 if (dv->width == mv->width)
1544 if (val_labs_count (dv->val_labs)
1545 && !val_labs_count (mv->val_labs))
1546 mv->val_labs = val_labs_copy (dv->val_labs);
1547 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1548 mv_copy (&mv->miss, &dv->miss);
1551 if (dv->label && !mv->label)
1552 mv->label = xstrdup (dv->label);
1555 mv = dict_clone_var_assert (m, dv, dv->name);
1561 /* Marks V's master variable as MASTER. */
1563 set_master (struct variable *v, struct variable *master)
1565 var_attach_aux (v, master, NULL);
1568 /* Returns the master variable corresponding to V,
1569 as set with set_master(). */
1570 static struct variable *
1571 get_master (struct variable *v)
1580 A case map copies data from a case that corresponds for one
1581 dictionary to a case that corresponds to a second dictionary
1582 derived from the first by, optionally, deleting, reordering,
1583 or renaming variables. (No new variables may be created.)
1589 size_t value_cnt; /* Number of values in map. */
1590 int *map; /* For each destination index, the
1591 corresponding source index. */
1594 /* Prepares dictionary D for producing a case map. Afterward,
1595 the caller may delete, reorder, or rename variables within D
1596 at will before using finish_case_map() to produce the case
1599 Uses D's aux members, which must otherwise not be in use. */
1601 start_case_map (struct dictionary *d)
1603 size_t var_cnt = dict_get_var_cnt (d);
1606 for (i = 0; i < var_cnt; i++)
1608 struct variable *v = dict_get_var (d, i);
1609 int *src_fv = xmalloc (sizeof *src_fv);
1611 var_attach_aux (v, src_fv, var_dtor_free);
1615 /* Produces a case map from dictionary D, which must have been
1616 previously prepared with start_case_map().
1618 Does not retain any reference to D, and clears the aux members
1619 set up by start_case_map().
1621 Returns the new case map, or a null pointer if no mapping is
1622 required (that is, no data has changed position). */
1623 static struct case_map *
1624 finish_case_map (struct dictionary *d)
1626 struct case_map *map;
1627 size_t var_cnt = dict_get_var_cnt (d);
1631 map = xmalloc (sizeof *map);
1632 map->value_cnt = dict_get_next_value_idx (d);
1633 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1634 for (i = 0; i < map->value_cnt; i++)
1638 for (i = 0; i < var_cnt; i++)
1640 struct variable *v = dict_get_var (d, i);
1641 int *src_fv = (int *) var_detach_aux (v);
1644 if (v->fv != *src_fv)
1647 for (idx = 0; idx < v->nv; idx++)
1649 int src_idx = *src_fv + idx;
1650 int dst_idx = v->fv + idx;
1652 assert (map->map[dst_idx] == -1);
1653 map->map[dst_idx] = src_idx;
1660 destroy_case_map (map);
1664 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1670 /* Maps from SRC to DST, applying case map MAP. */
1672 map_case (const struct case_map *map,
1673 const struct ccase *src, struct ccase *dst)
1677 assert (map != NULL);
1678 assert (src != NULL);
1679 assert (dst != NULL);
1680 assert (src != dst);
1682 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1684 int src_idx = map->map[dst_idx];
1686 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1690 /* Destroys case map MAP. */
1692 destroy_case_map (struct case_map *map)