1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/por-file-writer.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/storage-stream.h>
35 #include <data/sys-file-writer.h>
36 #include <data/transformations.h>
37 #include <data/value-labels.h>
38 #include <data/variable.h>
39 #include <language/command.h>
40 #include <language/data-io/file-handle.h>
41 #include <language/lexer/lexer.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/compiler.h>
44 #include <libpspp/hash.h>
45 #include <libpspp/message.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/str.h>
51 #define _(msgid) gettext (msgid)
53 /* Rearranging and reducing a dictionary. */
54 static void start_case_map (struct dictionary *);
55 static struct case_map *finish_case_map (struct dictionary *);
56 static void map_case (const struct case_map *,
57 const struct ccase *, struct ccase *);
58 static void destroy_case_map (struct case_map *);
60 static bool parse_dict_trim (struct dictionary *);
62 /* Reading system and portable files. */
64 /* Type of command. */
71 /* Case reader input program. */
72 struct case_reader_pgm
74 struct any_reader *reader; /* File reader. */
75 struct case_map *map; /* Map from file dict to active file dict. */
76 struct ccase bounce; /* Bounce buffer. */
79 static const struct case_source_class case_reader_source_class;
81 static void case_reader_pgm_free (struct case_reader_pgm *);
83 /* Parses a GET or IMPORT command. */
85 parse_read_command (enum reader_command type)
87 struct case_reader_pgm *pgm = NULL;
88 struct file_handle *fh = NULL;
89 struct dictionary *dict = NULL;
95 if (lex_match_id ("FILE") || token == T_STRING)
99 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
103 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
107 if (lex_match_id ("COMM"))
109 else if (lex_match_id ("TAPE"))
113 lex_error (_("expecting COMM or TAPE"));
123 lex_sbc_missing ("FILE");
127 discard_variables ();
129 pgm = xmalloc (sizeof *pgm);
130 pgm->reader = any_reader_open (fh, &dict);
132 case_nullify (&pgm->bounce);
133 if (pgm->reader == NULL)
136 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
138 start_case_map (dict);
143 if (!parse_dict_trim (dict))
147 pgm->map = finish_case_map (dict);
149 dict_destroy (default_dict);
152 proc_set_source (create_case_source (&case_reader_source_class, pgm));
157 case_reader_pgm_free (pgm);
160 return CMD_CASCADING_FAILURE;
163 /* Frees a struct case_reader_pgm. */
165 case_reader_pgm_free (struct case_reader_pgm *pgm)
169 any_reader_close (pgm->reader);
170 destroy_case_map (pgm->map);
171 case_destroy (&pgm->bounce);
176 /* Clears internal state related to case reader input procedure. */
178 case_reader_source_destroy (struct case_source *source)
180 struct case_reader_pgm *pgm = source->aux;
181 case_reader_pgm_free (pgm);
184 /* Reads all the cases from the data file into C and passes them
185 to WRITE_CASE one by one, passing WC_DATA.
186 Returns true if successful, false if an I/O error occurred. */
188 case_reader_source_read (struct case_source *source,
190 write_case_func *write_case, write_case_data wc_data)
192 struct case_reader_pgm *pgm = source->aux;
198 if (pgm->map == NULL)
199 got_case = any_reader_read (pgm->reader, c);
202 got_case = any_reader_read (pgm->reader, &pgm->bounce);
204 map_case (pgm->map, &pgm->bounce, c);
209 ok = write_case (wc_data);
213 return ok && !any_reader_error (pgm->reader);
216 static const struct case_source_class case_reader_source_class =
220 case_reader_source_read,
221 case_reader_source_destroy,
228 return parse_read_command (GET_CMD);
235 return parse_read_command (IMPORT_CMD);
238 /* Writing system and portable files. */
240 /* Type of output file. */
243 SYSFILE_WRITER, /* System file. */
244 PORFILE_WRITER /* Portable file. */
247 /* Type of a command. */
250 XFORM_CMD, /* Transformation. */
251 PROC_CMD /* Procedure. */
254 /* File writer plus a case map. */
257 struct any_writer *writer; /* File writer. */
258 struct case_map *map; /* Map to output file dictionary
259 (null pointer for identity mapping). */
260 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
265 case_writer_destroy (struct case_writer *aw)
270 ok = any_writer_close (aw->writer);
271 destroy_case_map (aw->map);
272 case_destroy (&aw->bounce);
278 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
279 WRITER_TYPE identifies the type of file to write,
280 and COMMAND_TYPE identifies the type of command.
282 On success, returns a writer.
283 For procedures only, sets *RETAIN_UNSELECTED to true if cases
284 that would otherwise be excluded by FILTER or USE should be
287 On failure, returns a null pointer. */
288 static struct case_writer *
289 parse_write_command (enum writer_type writer_type,
290 enum command_type command_type,
291 bool *retain_unselected)
294 struct file_handle *handle; /* Output file. */
295 struct dictionary *dict; /* Dictionary for output file. */
296 struct case_writer *aw; /* Writer. */
298 /* Common options. */
299 bool print_map; /* Print map? TODO. */
300 bool print_short_names; /* Print long-to-short name map. TODO. */
301 struct sfm_write_options sysfile_opts;
302 struct pfm_write_options porfile_opts;
304 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
305 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
306 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
308 if (command_type == PROC_CMD)
309 *retain_unselected = true;
312 dict = dict_clone (default_dict);
313 aw = xmalloc (sizeof *aw);
316 case_nullify (&aw->bounce);
318 print_short_names = false;
319 sysfile_opts = sfm_writer_default_options ();
320 porfile_opts = pfm_writer_default_options ();
322 start_case_map (dict);
323 dict_delete_scratch_vars (dict);
328 if (lex_match_id ("OUTFILE"))
332 lex_sbc_only_once ("OUTFILE");
338 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
342 else if (lex_match_id ("NAMES"))
343 print_short_names = true;
344 else if (lex_match_id ("PERMISSIONS"))
349 if (lex_match_id ("READONLY"))
351 else if (lex_match_id ("WRITEABLE"))
355 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
358 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
360 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
363 if (lex_match_id ("RETAIN"))
364 *retain_unselected = true;
365 else if (lex_match_id ("DELETE"))
366 *retain_unselected = false;
369 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
373 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
374 sysfile_opts.compress = true;
375 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
376 sysfile_opts.compress = false;
377 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
380 if (!lex_force_int ())
382 sysfile_opts.version = lex_integer ();
385 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
388 if (lex_match_id ("COMMUNICATIONS"))
389 porfile_opts.type = PFM_COMM;
390 else if (lex_match_id ("TAPE"))
391 porfile_opts.type = PFM_TAPE;
394 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
398 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
401 if (!lex_force_int ())
403 porfile_opts.digits = lex_integer ();
406 else if (!parse_dict_trim (dict))
409 if (!lex_match ('/'))
412 if (lex_end_of_command () != CMD_SUCCESS)
417 lex_sbc_missing ("OUTFILE");
421 dict_compact_values (dict);
422 aw->map = finish_case_map (dict);
424 case_create (&aw->bounce, dict_get_next_value_idx (dict));
426 if (fh_get_referent (handle) == FH_REF_FILE)
431 aw->writer = any_writer_from_sfm_writer (
432 sfm_open_writer (handle, dict, sysfile_opts));
435 aw->writer = any_writer_from_pfm_writer (
436 pfm_open_writer (handle, dict, porfile_opts));
441 aw->writer = any_writer_open (handle, dict);
447 case_writer_destroy (aw);
452 /* Writes case C to writer AW. */
454 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
458 map_case (aw->map, c, &aw->bounce);
461 return any_writer_write (aw->writer, c);
464 /* SAVE and EXPORT. */
466 static bool output_proc (const struct ccase *, void *);
468 /* Parses and performs the SAVE or EXPORT procedure. */
470 parse_output_proc (enum writer_type writer_type)
472 bool retain_unselected;
473 struct variable *saved_filter_variable;
474 struct case_writer *aw;
477 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
479 return CMD_CASCADING_FAILURE;
481 saved_filter_variable = dict_get_filter (default_dict);
482 if (retain_unselected)
483 dict_set_filter (default_dict, NULL);
484 ok = procedure (output_proc, aw);
485 dict_set_filter (default_dict, saved_filter_variable);
487 case_writer_destroy (aw);
488 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
491 /* Writes case C to file. */
493 output_proc (const struct ccase *c, void *aw_)
495 struct case_writer *aw = aw_;
496 return case_writer_write_case (aw, c);
502 return parse_output_proc (SYSFILE_WRITER);
508 return parse_output_proc (PORFILE_WRITER);
511 /* XSAVE and XEXPORT. */
513 /* Transformation. */
516 struct case_writer *aw; /* Writer. */
519 static trns_proc_func output_trns_proc;
520 static trns_free_func output_trns_free;
522 /* Parses the XSAVE or XEXPORT transformation command. */
524 parse_output_trns (enum writer_type writer_type)
526 struct output_trns *t = xmalloc (sizeof *t);
527 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
531 return CMD_CASCADING_FAILURE;
534 add_transformation (output_trns_proc, output_trns_free, t);
538 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
540 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
542 struct output_trns *t = trns_;
543 case_writer_write_case (t->aw, c);
544 return TRNS_CONTINUE;
547 /* Frees an XSAVE or XEXPORT transformation.
548 Returns true if successful, false if an I/O error occurred. */
550 output_trns_free (void *trns_)
552 struct output_trns *t = trns_;
557 ok = case_writer_destroy (t->aw);
567 return parse_output_trns (SYSFILE_WRITER);
570 /* XEXPORT command. */
574 return parse_output_trns (PORFILE_WRITER);
577 static bool rename_variables (struct dictionary *dict);
578 static bool drop_variables (struct dictionary *dict);
579 static bool keep_variables (struct dictionary *dict);
581 /* Commands that read and write system files share a great deal
582 of common syntactic structure for rearranging and dropping
583 variables. This function parses this syntax and modifies DICT
584 appropriately. Returns true on success, false on failure. */
586 parse_dict_trim (struct dictionary *dict)
588 if (lex_match_id ("MAP"))
593 else if (lex_match_id ("DROP"))
594 return drop_variables (dict);
595 else if (lex_match_id ("KEEP"))
596 return keep_variables (dict);
597 else if (lex_match_id ("RENAME"))
598 return rename_variables (dict);
601 lex_error (_("expecting a valid subcommand"));
606 /* Parses and performs the RENAME subcommand of GET and SAVE. */
608 rename_variables (struct dictionary *dict)
626 v = parse_dict_variable (dict);
629 if (!lex_force_match ('=')
632 if (dict_lookup_var (dict, tokid) != NULL)
634 msg (SE, _("Cannot rename %s as %s because there already exists "
635 "a variable named %s. To rename variables with "
636 "overlapping names, use a single RENAME subcommand "
637 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
638 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
642 dict_rename_var (dict, v, tokid);
651 while (lex_match ('('))
655 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
657 if (!lex_match ('='))
659 msg (SE, _("`=' expected after variable list."));
662 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
666 msg (SE, _("Number of variables on left side of `=' (%d) does not "
667 "match number of variables on right side (%d), in "
668 "parenthesized group %d of RENAME subcommand."),
669 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
672 if (!lex_force_match (')'))
677 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
679 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
685 for (i = 0; i < nn; i++)
693 /* Parses and performs the DROP subcommand of GET and SAVE.
694 Returns true if successful, false on failure.*/
696 drop_variables (struct dictionary *dict)
702 if (!parse_variables (dict, &v, &nv, PV_NONE))
704 dict_delete_vars (dict, v, nv);
707 if (dict_get_var_cnt (dict) == 0)
709 msg (SE, _("Cannot DROP all variables from dictionary."));
715 /* Parses and performs the KEEP subcommand of GET and SAVE.
716 Returns true if successful, false on failure.*/
718 keep_variables (struct dictionary *dict)
725 if (!parse_variables (dict, &v, &nv, PV_NONE))
728 /* Move the specified variables to the beginning. */
729 dict_reorder_vars (dict, v, nv);
731 /* Delete the remaining variables. */
732 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
733 for (i = nv; i < dict_get_var_cnt (dict); i++)
734 v[i - nv] = dict_get_var (dict, i);
735 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
746 MTF_FILE, /* Specified on FILE= subcommand. */
747 MTF_TABLE /* Specified on TABLE= subcommand. */
750 /* One of the files on MATCH FILES. */
753 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
754 struct mtf_file *next_min; /* Next in the chain of minimums. */
756 int type; /* One of MTF_*. */
757 struct variable **by; /* List of BY variables for this file. */
758 struct file_handle *handle; /* File handle. */
759 struct any_reader *reader; /* File reader. */
760 struct dictionary *dict; /* Dictionary from system file. */
763 char *in_name; /* Variable name. */
764 struct variable *in_var; /* Variable (in master dictionary). */
766 struct ccase input; /* Input record. */
769 /* MATCH FILES procedure. */
772 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
773 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
775 bool ok; /* False if I/O error occurs. */
777 size_t by_cnt; /* Number of variables on BY subcommand. */
779 /* Names of FIRST, LAST variables. */
780 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
782 struct dictionary *dict; /* Dictionary of output file. */
783 struct casefile *output; /* MATCH FILES output. */
784 struct ccase mtf_case; /* Case used for output. */
786 unsigned seq_num; /* Have we initialized this variable? */
787 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
790 static bool mtf_free (struct mtf_proc *);
791 static bool mtf_close_file (struct mtf_file *);
792 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
793 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
795 static bool mtf_read_nonactive_records (void *);
796 static bool mtf_processing_finish (void *);
797 static bool mtf_processing (const struct ccase *, void *);
799 static char *var_type_description (struct variable *);
801 static void set_master (struct variable *, struct variable *master);
802 static struct variable *get_master (struct variable *);
804 /* Parse and execute the MATCH FILES command. */
806 cmd_match_files (void)
809 struct mtf_file *first_table = NULL;
810 struct mtf_file *iter;
812 bool used_active_file = false;
813 bool saw_table = false;
818 mtf.head = mtf.tail = NULL;
822 mtf.dict = dict_create ();
824 case_nullify (&mtf.mtf_case);
827 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
831 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
833 struct mtf_file *file = xmalloc (sizeof *file);
835 if (lex_match_id ("FILE"))
836 file->type = MTF_FILE;
837 else if (lex_match_id ("TABLE"))
839 file->type = MTF_TABLE;
850 file->in_name = NULL;
852 case_nullify (&file->input);
854 /* FILEs go first, then TABLEs. */
855 if (file->type == MTF_TABLE || first_table == NULL)
858 file->prev = mtf.tail;
860 mtf.tail->next = file;
862 if (mtf.head == NULL)
864 if (file->type == MTF_TABLE && first_table == NULL)
869 assert (file->type == MTF_FILE);
870 file->next = first_table;
871 file->prev = first_table->prev;
872 if (first_table->prev)
873 first_table->prev->next = file;
876 first_table->prev = file;
884 if (used_active_file)
886 msg (SE, _("The active file may not be specified more "
890 used_active_file = true;
892 if (!proc_has_source ())
894 msg (SE, _("Cannot specify the active file since no active "
895 "file has been defined."));
899 if (proc_make_temporary_transformations_permanent ())
901 _("MATCH FILES may not be used after TEMPORARY when "
902 "the active file is an input source. "
903 "Temporary transformations will be made permanent."));
905 file->dict = default_dict;
909 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
910 if (file->handle == NULL)
913 file->reader = any_reader_open (file->handle, &file->dict);
914 if (file->reader == NULL)
917 case_create (&file->input, dict_get_next_value_idx (file->dict));
920 while (lex_match ('/'))
921 if (lex_match_id ("RENAME"))
923 if (!rename_variables (file->dict))
926 else if (lex_match_id ("IN"))
935 if (file->in_name != NULL)
937 msg (SE, _("Multiple IN subcommands for a single FILE or "
941 file->in_name = xstrdup (tokid);
946 mtf_merge_dictionary (mtf.dict, file);
951 if (lex_match (T_BY))
953 struct variable **by;
957 msg (SE, _("BY may appear at most once."));
962 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
963 PV_NO_DUPLICATE | PV_NO_SCRATCH))
966 for (iter = mtf.head; iter != NULL; iter = iter->next)
970 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
972 for (i = 0; i < mtf.by_cnt; i++)
974 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
975 if (iter->by[i] == NULL)
977 msg (SE, _("File %s lacks BY variable %s."),
978 iter->handle ? fh_get_name (iter->handle) : "*",
987 else if (lex_match_id ("FIRST"))
989 if (mtf.first[0] != '\0')
991 msg (SE, _("FIRST may appear at most once."));
996 if (!lex_force_id ())
998 strcpy (mtf.first, tokid);
1001 else if (lex_match_id ("LAST"))
1003 if (mtf.last[0] != '\0')
1005 msg (SE, _("LAST may appear at most once."));
1010 if (!lex_force_id ())
1012 strcpy (mtf.last, tokid);
1015 else if (lex_match_id ("MAP"))
1019 else if (lex_match_id ("DROP"))
1021 if (!drop_variables (mtf.dict))
1024 else if (lex_match_id ("KEEP"))
1026 if (!keep_variables (mtf.dict))
1035 if (!lex_match ('/') && token != '.')
1037 lex_end_of_command ();
1042 if (mtf.by_cnt == 0)
1046 msg (SE, _("BY is required when TABLE is specified."));
1051 msg (SE, _("BY is required when IN is specified."));
1056 /* Set up mapping from each file's variables to master
1058 for (iter = mtf.head; iter != NULL; iter = iter->next)
1060 struct dictionary *d = iter->dict;
1063 for (i = 0; i < dict_get_var_cnt (d); i++)
1065 struct variable *v = dict_get_var (d, i);
1066 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1072 /* Add IN variables to master dictionary. */
1073 for (iter = mtf.head; iter != NULL; iter = iter->next)
1074 if (iter->in_name != NULL)
1076 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1077 if (iter->in_var == NULL)
1079 msg (SE, _("IN variable name %s duplicates an "
1080 "existing variable name."),
1081 iter->in_var->name);
1084 iter->in_var->print = iter->in_var->write
1085 = make_output_format (FMT_F, 1, 0);
1088 /* MATCH FILES performs an n-way merge on all its input files.
1091 1. Read one input record from every input FILE.
1093 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1095 3. Find the FILE input record(s) that have minimum BY
1096 values. Store all the values from these input records into
1099 4. For every TABLE, read another record as long as the BY values
1100 on the TABLE's input record are less than the FILEs' BY values.
1101 If an exact match is found, store all the values from the TABLE
1102 input record into the output record.
1104 5. Write the output record.
1106 6. Read another record from each input file FILE and TABLE that
1107 we stored values from above. If we come to the end of one of the
1108 input files, remove it from the list of input files.
1110 7. Repeat from step 2.
1112 Unfortunately, this algorithm can't be implemented in a
1113 straightforward way because there's no function to read a
1114 record from the active file. Instead, it has to be written
1117 FIXME: For merging large numbers of files (more than 10?) a
1118 better algorithm would use a heap for finding minimum
1121 if (!used_active_file)
1122 discard_variables ();
1124 dict_compact_values (mtf.dict);
1125 mtf.output = casefile_create (dict_get_next_value_idx (mtf.dict));
1126 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1127 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1129 if (!mtf_read_nonactive_records (&mtf))
1132 if (used_active_file)
1134 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1135 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1138 ok = mtf_processing_finish (&mtf);
1140 discard_variables ();
1142 dict_destroy (default_dict);
1143 default_dict = mtf.dict;
1145 proc_set_source (storage_source_create (mtf.output));
1148 if (!mtf_free (&mtf))
1150 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1154 return CMD_CASCADING_FAILURE;
1157 /* Repeats 2...7 an arbitrary number of times. */
1159 mtf_processing_finish (void *mtf_)
1161 struct mtf_proc *mtf = mtf_;
1162 struct mtf_file *iter;
1164 /* Find the active file and delete it. */
1165 for (iter = mtf->head; iter; iter = iter->next)
1166 if (iter->handle == NULL)
1168 if (!mtf_delete_file_in_place (mtf, &iter))
1173 while (mtf->head && mtf->head->type == MTF_FILE)
1174 if (!mtf_processing (NULL, mtf))
1180 /* Return a string in a static buffer describing V's variable type and
1183 var_type_description (struct variable *v)
1185 static char buf[2][32];
1192 if (v->type == NUMERIC)
1193 strcpy (s, "numeric");
1196 assert (v->type == ALPHA);
1197 sprintf (s, "string with width %d", v->width);
1202 /* Closes FILE and frees its associated data.
1203 Returns true if successful, false if an I/O error
1204 occurred on FILE. */
1206 mtf_close_file (struct mtf_file *file)
1208 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1210 any_reader_close (file->reader);
1211 if (file->handle != NULL)
1212 dict_destroy (file->dict);
1213 case_destroy (&file->input);
1214 free (file->in_name);
1219 /* Free all the data for the MATCH FILES procedure.
1220 Returns true if successful, false if an I/O error
1223 mtf_free (struct mtf_proc *mtf)
1225 struct mtf_file *iter, *next;
1228 for (iter = mtf->head; iter; iter = next)
1231 assert (iter->dict != mtf->dict);
1232 if (!mtf_close_file (iter))
1237 dict_destroy (mtf->dict);
1238 case_destroy (&mtf->mtf_case);
1239 free (mtf->seq_nums);
1244 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1245 file in the chain, or to NULL if was the last in the chain.
1246 Returns true if successful, false if an I/O error occurred. */
1248 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1250 struct mtf_file *f = *file;
1254 f->prev->next = f->next;
1256 f->next->prev = f->prev;
1258 mtf->head = f->next;
1260 mtf->tail = f->prev;
1263 if (f->in_var != NULL)
1264 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1265 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1267 struct variable *v = dict_get_var (f->dict, i);
1268 struct variable *mv = get_master (v);
1271 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1273 if (v->type == NUMERIC)
1276 memset (out->s, ' ', v->width);
1280 return mtf_close_file (f);
1283 /* Read a record from every input file except the active file.
1284 Returns true if successful, false if an I/O error occurred. */
1286 mtf_read_nonactive_records (void *mtf_)
1288 struct mtf_proc *mtf = mtf_;
1289 struct mtf_file *iter, *next;
1292 for (iter = mtf->head; ok && iter != NULL; iter = next)
1295 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1296 if (!mtf_delete_file_in_place (mtf, &iter))
1302 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1303 if A == B, 1 if A > B. */
1305 mtf_compare_BY_values (struct mtf_proc *mtf,
1306 struct mtf_file *a, struct mtf_file *b,
1307 const struct ccase *c)
1309 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1310 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1311 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1312 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1315 /* Perform one iteration of steps 3...7 above.
1316 Returns true if successful, false if an I/O error occurred. */
1318 mtf_processing (const struct ccase *c, void *mtf_)
1320 struct mtf_proc *mtf = mtf_;
1322 /* Do we need another record from the active file? */
1323 bool read_active_file;
1325 assert (mtf->head != NULL);
1326 if (mtf->head->type == MTF_TABLE)
1331 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1332 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1333 struct mtf_file *iter, *next;
1335 read_active_file = false;
1337 /* 3. Find the FILE input record(s) that have minimum BY
1338 values. Store all the values from these input records into
1339 the output record. */
1340 min_head = min_tail = mtf->head;
1341 max_head = max_tail = NULL;
1342 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1345 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1349 max_tail = max_tail->next_min = iter;
1351 max_head = max_tail = iter;
1354 min_tail = min_tail->next_min = iter;
1359 max_tail->next_min = min_head;
1360 max_tail = min_tail;
1364 max_head = min_head;
1365 max_tail = min_tail;
1367 min_head = min_tail = iter;
1371 /* 4. For every TABLE, read another record as long as the BY
1372 values on the TABLE's input record are less than the FILEs'
1373 BY values. If an exact match is found, store all the values
1374 from the TABLE input record into the output record. */
1375 for (; iter != NULL; iter = next)
1377 assert (iter->type == MTF_TABLE);
1382 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1386 max_tail = max_tail->next_min = iter;
1388 max_head = max_tail = iter;
1391 min_tail = min_tail->next_min = iter;
1394 if (iter->handle == NULL)
1396 if (any_reader_read (iter->reader, &iter->input))
1398 if (!mtf_delete_file_in_place (mtf, &iter))
1405 /* Next sequence number. */
1408 /* Store data to all the records we are using. */
1410 min_tail->next_min = NULL;
1411 for (iter = min_head; iter; iter = iter->next_min)
1415 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1417 struct variable *v = dict_get_var (iter->dict, i);
1418 struct variable *mv = get_master (v);
1420 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1422 const struct ccase *record
1423 = case_is_null (&iter->input) ? c : &iter->input;
1424 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1426 mtf->seq_nums[mv->index] = mtf->seq_num;
1427 if (v->type == NUMERIC)
1428 out->f = case_num (record, v->fv);
1430 memcpy (out->s, case_str (record, v->fv), v->width);
1433 if (iter->in_var != NULL)
1434 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1436 if (iter->type == MTF_FILE && iter->handle == NULL)
1437 read_active_file = true;
1440 /* Store missing values to all the records we're not
1443 max_tail->next_min = NULL;
1444 for (iter = max_head; iter; iter = iter->next_min)
1448 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1450 struct variable *v = dict_get_var (iter->dict, i);
1451 struct variable *mv = get_master (v);
1453 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1455 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1456 mtf->seq_nums[mv->index] = mtf->seq_num;
1458 if (v->type == NUMERIC)
1461 memset (out->s, ' ', v->width);
1464 if (iter->in_var != NULL)
1465 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1468 /* 5. Write the output record. */
1469 casefile_append (mtf->output, &mtf->mtf_case);
1471 /* 6. Read another record from each input file FILE and TABLE
1472 that we stored values from above. If we come to the end of
1473 one of the input files, remove it from the list of input
1475 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1477 next = iter->next_min;
1478 if (iter->reader != NULL
1479 && !any_reader_read (iter->reader, &iter->input))
1480 if (!mtf_delete_file_in_place (mtf, &iter))
1484 while (!read_active_file
1485 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1490 /* Merge the dictionary for file F into master dictionary M. */
1492 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1494 struct dictionary *d = f->dict;
1495 const char *d_docs, *m_docs;
1498 if (dict_get_label (m) == NULL)
1499 dict_set_label (m, dict_get_label (d));
1501 d_docs = dict_get_documents (d);
1502 m_docs = dict_get_documents (m);
1506 dict_set_documents (m, d_docs);
1512 new_len = strlen (m_docs) + strlen (d_docs);
1513 new_docs = xmalloc (new_len + 1);
1514 strcpy (new_docs, m_docs);
1515 strcat (new_docs, d_docs);
1516 dict_set_documents (m, new_docs);
1521 for (i = 0; i < dict_get_var_cnt (d); i++)
1523 struct variable *dv = dict_get_var (d, i);
1524 struct variable *mv = dict_lookup_var (m, dv->name);
1526 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1531 if (mv->width != dv->width)
1533 msg (SE, _("Variable %s in file %s (%s) has different "
1534 "type or width from the same variable in "
1535 "earlier file (%s)."),
1536 dv->name, fh_get_name (f->handle),
1537 var_type_description (dv), var_type_description (mv));
1541 if (dv->width == mv->width)
1543 if (val_labs_count (dv->val_labs)
1544 && !val_labs_count (mv->val_labs))
1546 val_labs_destroy (mv->val_labs);
1547 mv->val_labs = val_labs_copy (dv->val_labs);
1549 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1550 mv_copy (&mv->miss, &dv->miss);
1553 if (dv->label && !mv->label)
1554 mv->label = xstrdup (dv->label);
1557 mv = dict_clone_var_assert (m, dv, dv->name);
1563 /* Marks V's master variable as MASTER. */
1565 set_master (struct variable *v, struct variable *master)
1567 var_attach_aux (v, master, NULL);
1570 /* Returns the master variable corresponding to V,
1571 as set with set_master(). */
1572 static struct variable *
1573 get_master (struct variable *v)
1582 A case map copies data from a case that corresponds for one
1583 dictionary to a case that corresponds to a second dictionary
1584 derived from the first by, optionally, deleting, reordering,
1585 or renaming variables. (No new variables may be created.)
1591 size_t value_cnt; /* Number of values in map. */
1592 int *map; /* For each destination index, the
1593 corresponding source index. */
1596 /* Prepares dictionary D for producing a case map. Afterward,
1597 the caller may delete, reorder, or rename variables within D
1598 at will before using finish_case_map() to produce the case
1601 Uses D's aux members, which must otherwise not be in use. */
1603 start_case_map (struct dictionary *d)
1605 size_t var_cnt = dict_get_var_cnt (d);
1608 for (i = 0; i < var_cnt; i++)
1610 struct variable *v = dict_get_var (d, i);
1611 int *src_fv = xmalloc (sizeof *src_fv);
1613 var_attach_aux (v, src_fv, var_dtor_free);
1617 /* Produces a case map from dictionary D, which must have been
1618 previously prepared with start_case_map().
1620 Does not retain any reference to D, and clears the aux members
1621 set up by start_case_map().
1623 Returns the new case map, or a null pointer if no mapping is
1624 required (that is, no data has changed position). */
1625 static struct case_map *
1626 finish_case_map (struct dictionary *d)
1628 struct case_map *map;
1629 size_t var_cnt = dict_get_var_cnt (d);
1633 map = xmalloc (sizeof *map);
1634 map->value_cnt = dict_get_next_value_idx (d);
1635 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1636 for (i = 0; i < map->value_cnt; i++)
1640 for (i = 0; i < var_cnt; i++)
1642 struct variable *v = dict_get_var (d, i);
1643 int *src_fv = (int *) var_detach_aux (v);
1646 if (v->fv != *src_fv)
1649 for (idx = 0; idx < v->nv; idx++)
1651 int src_idx = *src_fv + idx;
1652 int dst_idx = v->fv + idx;
1654 assert (map->map[dst_idx] == -1);
1655 map->map[dst_idx] = src_idx;
1662 destroy_case_map (map);
1666 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1672 /* Maps from SRC to DST, applying case map MAP. */
1674 map_case (const struct case_map *map,
1675 const struct ccase *src, struct ccase *dst)
1679 assert (map != NULL);
1680 assert (src != NULL);
1681 assert (dst != NULL);
1682 assert (src != dst);
1684 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1686 int src_idx = map->map[dst_idx];
1688 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1692 /* Destroys case map MAP. */
1694 destroy_case_map (struct case_map *map)