1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/por-file-writer.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/storage-stream.h>
35 #include <data/sys-file-writer.h>
36 #include <data/transformations.h>
37 #include <data/value-labels.h>
38 #include <data/variable.h>
39 #include <language/command.h>
40 #include <language/data-io/file-handle.h>
41 #include <language/lexer/lexer.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/compiler.h>
44 #include <libpspp/hash.h>
45 #include <libpspp/message.h>
46 #include <libpspp/message.h>
47 #include <libpspp/misc.h>
48 #include <libpspp/str.h>
51 #define _(msgid) gettext (msgid)
53 /* Rearranging and reducing a dictionary. */
54 static void start_case_map (struct dictionary *);
55 static struct case_map *finish_case_map (struct dictionary *);
56 static void map_case (const struct case_map *,
57 const struct ccase *, struct ccase *);
58 static void destroy_case_map (struct case_map *);
60 static bool parse_dict_trim (struct dictionary *);
62 /* Reading system and portable files. */
64 /* Type of command. */
71 /* Case reader input program. */
72 struct case_reader_pgm
74 struct any_reader *reader; /* File reader. */
75 struct case_map *map; /* Map from file dict to active file dict. */
76 struct ccase bounce; /* Bounce buffer. */
79 static const struct case_source_class case_reader_source_class;
81 static void case_reader_pgm_free (struct case_reader_pgm *);
83 /* Parses a GET or IMPORT command. */
85 parse_read_command (enum reader_command type)
87 struct case_reader_pgm *pgm = NULL;
88 struct file_handle *fh = NULL;
89 struct dictionary *dict = NULL;
95 if (lex_match_id ("FILE") || token == T_STRING)
99 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
103 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
107 if (lex_match_id ("COMM"))
109 else if (lex_match_id ("TAPE"))
113 lex_error (_("expecting COMM or TAPE"));
123 lex_sbc_missing ("FILE");
127 discard_variables ();
129 pgm = xmalloc (sizeof *pgm);
130 pgm->reader = any_reader_open (fh, &dict);
132 case_nullify (&pgm->bounce);
133 if (pgm->reader == NULL)
136 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
138 start_case_map (dict);
143 if (!parse_dict_trim (dict))
147 pgm->map = finish_case_map (dict);
149 dict_destroy (default_dict);
152 proc_set_source (create_case_source (&case_reader_source_class, pgm));
157 case_reader_pgm_free (pgm);
160 return CMD_CASCADING_FAILURE;
163 /* Frees a struct case_reader_pgm. */
165 case_reader_pgm_free (struct case_reader_pgm *pgm)
169 any_reader_close (pgm->reader);
170 destroy_case_map (pgm->map);
171 case_destroy (&pgm->bounce);
176 /* Clears internal state related to case reader input procedure. */
178 case_reader_source_destroy (struct case_source *source)
180 struct case_reader_pgm *pgm = source->aux;
181 case_reader_pgm_free (pgm);
184 /* Reads all the cases from the data file into C and passes them
185 to WRITE_CASE one by one, passing WC_DATA.
186 Returns true if successful, false if an I/O error occurred. */
188 case_reader_source_read (struct case_source *source,
190 write_case_func *write_case, write_case_data wc_data)
192 struct case_reader_pgm *pgm = source->aux;
198 if (pgm->map == NULL)
199 got_case = any_reader_read (pgm->reader, c);
202 got_case = any_reader_read (pgm->reader, &pgm->bounce);
204 map_case (pgm->map, &pgm->bounce, c);
209 ok = write_case (wc_data);
213 return ok && !any_reader_error (pgm->reader);
216 static const struct case_source_class case_reader_source_class =
220 case_reader_source_read,
221 case_reader_source_destroy,
228 return parse_read_command (GET_CMD);
235 return parse_read_command (IMPORT_CMD);
238 /* Writing system and portable files. */
240 /* Type of output file. */
243 SYSFILE_WRITER, /* System file. */
244 PORFILE_WRITER /* Portable file. */
247 /* Type of a command. */
250 XFORM_CMD, /* Transformation. */
251 PROC_CMD /* Procedure. */
254 /* File writer plus a case map. */
257 struct any_writer *writer; /* File writer. */
258 struct case_map *map; /* Map to output file dictionary
259 (null pointer for identity mapping). */
260 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
265 case_writer_destroy (struct case_writer *aw)
270 ok = any_writer_close (aw->writer);
271 destroy_case_map (aw->map);
272 case_destroy (&aw->bounce);
278 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
279 WRITER_TYPE identifies the type of file to write,
280 and COMMAND_TYPE identifies the type of command.
282 On success, returns a writer.
283 For procedures only, sets *RETAIN_UNSELECTED to true if cases
284 that would otherwise be excluded by FILTER or USE should be
287 On failure, returns a null pointer. */
288 static struct case_writer *
289 parse_write_command (enum writer_type writer_type,
290 enum command_type command_type,
291 bool *retain_unselected)
294 struct file_handle *handle; /* Output file. */
295 struct dictionary *dict; /* Dictionary for output file. */
296 struct case_writer *aw; /* Writer. */
298 /* Common options. */
299 bool print_map; /* Print map? TODO. */
300 bool print_short_names; /* Print long-to-short name map. TODO. */
301 struct sfm_write_options sysfile_opts;
302 struct pfm_write_options porfile_opts;
304 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
305 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
306 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
308 if (command_type == PROC_CMD)
309 *retain_unselected = true;
312 dict = dict_clone (default_dict);
313 aw = xmalloc (sizeof *aw);
316 case_nullify (&aw->bounce);
318 print_short_names = false;
319 sysfile_opts = sfm_writer_default_options ();
320 porfile_opts = pfm_writer_default_options ();
322 start_case_map (dict);
323 dict_delete_scratch_vars (dict);
328 if (lex_match_id ("OUTFILE"))
332 lex_sbc_only_once ("OUTFILE");
338 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
342 else if (lex_match_id ("NAMES"))
343 print_short_names = true;
344 else if (lex_match_id ("PERMISSIONS"))
349 if (lex_match_id ("READONLY"))
351 else if (lex_match_id ("WRITEABLE"))
355 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
358 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
360 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
363 if (lex_match_id ("RETAIN"))
364 *retain_unselected = true;
365 else if (lex_match_id ("DELETE"))
366 *retain_unselected = false;
369 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
373 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
374 sysfile_opts.compress = true;
375 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
376 sysfile_opts.compress = false;
377 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
380 if (!lex_force_int ())
382 sysfile_opts.version = lex_integer ();
385 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
388 if (lex_match_id ("COMMUNICATIONS"))
389 porfile_opts.type = PFM_COMM;
390 else if (lex_match_id ("TAPE"))
391 porfile_opts.type = PFM_TAPE;
394 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
398 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
401 if (!lex_force_int ())
403 porfile_opts.digits = lex_integer ();
406 else if (!parse_dict_trim (dict))
409 if (!lex_match ('/'))
412 if (lex_end_of_command () != CMD_SUCCESS)
417 lex_sbc_missing ("OUTFILE");
421 dict_compact_values (dict);
422 aw->map = finish_case_map (dict);
424 case_create (&aw->bounce, dict_get_next_value_idx (dict));
426 if (fh_get_referent (handle) == FH_REF_FILE)
431 aw->writer = any_writer_from_sfm_writer (
432 sfm_open_writer (handle, dict, sysfile_opts));
435 aw->writer = any_writer_from_pfm_writer (
436 pfm_open_writer (handle, dict, porfile_opts));
441 aw->writer = any_writer_open (handle, dict);
447 case_writer_destroy (aw);
452 /* Writes case C to writer AW. */
454 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
458 map_case (aw->map, c, &aw->bounce);
461 return any_writer_write (aw->writer, c);
464 /* SAVE and EXPORT. */
466 static bool output_proc (const struct ccase *, void *);
468 /* Parses and performs the SAVE or EXPORT procedure. */
470 parse_output_proc (enum writer_type writer_type)
472 bool retain_unselected;
473 struct variable *saved_filter_variable;
474 struct case_writer *aw;
477 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
479 return CMD_CASCADING_FAILURE;
481 saved_filter_variable = dict_get_filter (default_dict);
482 if (retain_unselected)
483 dict_set_filter (default_dict, NULL);
484 ok = procedure (output_proc, aw);
485 dict_set_filter (default_dict, saved_filter_variable);
487 case_writer_destroy (aw);
488 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
491 /* Writes case C to file. */
493 output_proc (const struct ccase *c, void *aw_)
495 struct case_writer *aw = aw_;
496 return case_writer_write_case (aw, c);
502 return parse_output_proc (SYSFILE_WRITER);
508 return parse_output_proc (PORFILE_WRITER);
511 /* XSAVE and XEXPORT. */
513 /* Transformation. */
516 struct case_writer *aw; /* Writer. */
519 static trns_proc_func output_trns_proc;
520 static trns_free_func output_trns_free;
522 /* Parses the XSAVE or XEXPORT transformation command. */
524 parse_output_trns (enum writer_type writer_type)
526 struct output_trns *t = xmalloc (sizeof *t);
527 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
531 return CMD_CASCADING_FAILURE;
534 add_transformation (output_trns_proc, output_trns_free, t);
538 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
540 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
542 struct output_trns *t = trns_;
543 case_writer_write_case (t->aw, c);
544 return TRNS_CONTINUE;
547 /* Frees an XSAVE or XEXPORT transformation.
548 Returns true if successful, false if an I/O error occurred. */
550 output_trns_free (void *trns_)
552 struct output_trns *t = trns_;
557 ok = case_writer_destroy (t->aw);
567 return parse_output_trns (SYSFILE_WRITER);
570 /* XEXPORT command. */
574 return parse_output_trns (PORFILE_WRITER);
577 static bool rename_variables (struct dictionary *dict);
578 static bool drop_variables (struct dictionary *dict);
579 static bool keep_variables (struct dictionary *dict);
581 /* Commands that read and write system files share a great deal
582 of common syntactic structure for rearranging and dropping
583 variables. This function parses this syntax and modifies DICT
584 appropriately. Returns true on success, false on failure. */
586 parse_dict_trim (struct dictionary *dict)
588 if (lex_match_id ("MAP"))
593 else if (lex_match_id ("DROP"))
594 return drop_variables (dict);
595 else if (lex_match_id ("KEEP"))
596 return keep_variables (dict);
597 else if (lex_match_id ("RENAME"))
598 return rename_variables (dict);
601 lex_error (_("expecting a valid subcommand"));
606 /* Parses and performs the RENAME subcommand of GET and SAVE. */
608 rename_variables (struct dictionary *dict)
626 v = parse_dict_variable (dict);
629 if (!lex_force_match ('=')
632 if (dict_lookup_var (dict, tokid) != NULL)
634 msg (SE, _("Cannot rename %s as %s because there already exists "
635 "a variable named %s. To rename variables with "
636 "overlapping names, use a single RENAME subcommand "
637 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
638 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
642 dict_rename_var (dict, v, tokid);
651 while (lex_match ('('))
655 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
657 if (!lex_match ('='))
659 msg (SE, _("`=' expected after variable list."));
662 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
666 msg (SE, _("Number of variables on left side of `=' (%d) does not "
667 "match number of variables on right side (%d), in "
668 "parenthesized group %d of RENAME subcommand."),
669 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
672 if (!lex_force_match (')'))
677 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
679 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
685 for (i = 0; i < nn; i++)
693 /* Parses and performs the DROP subcommand of GET and SAVE.
694 Returns true if successful, false on failure.*/
696 drop_variables (struct dictionary *dict)
702 if (!parse_variables (dict, &v, &nv, PV_NONE))
704 dict_delete_vars (dict, v, nv);
707 if (dict_get_var_cnt (dict) == 0)
709 msg (SE, _("Cannot DROP all variables from dictionary."));
715 /* Parses and performs the KEEP subcommand of GET and SAVE.
716 Returns true if successful, false on failure.*/
718 keep_variables (struct dictionary *dict)
725 if (!parse_variables (dict, &v, &nv, PV_NONE))
728 /* Move the specified variables to the beginning. */
729 dict_reorder_vars (dict, v, nv);
731 /* Delete the remaining variables. */
732 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
733 for (i = nv; i < dict_get_var_cnt (dict); i++)
734 v[i - nv] = dict_get_var (dict, i);
735 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
746 MTF_FILE, /* Specified on FILE= subcommand. */
747 MTF_TABLE /* Specified on TABLE= subcommand. */
750 /* One of the files on MATCH FILES. */
753 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
754 struct mtf_file *next_min; /* Next in the chain of minimums. */
756 int type; /* One of MTF_*. */
757 struct variable **by; /* List of BY variables for this file. */
758 struct file_handle *handle; /* File handle. */
759 struct any_reader *reader; /* File reader. */
760 struct dictionary *dict; /* Dictionary from system file. */
763 char *in_name; /* Variable name. */
764 struct variable *in_var; /* Variable (in master dictionary). */
766 struct ccase input; /* Input record. */
769 /* MATCH FILES procedure. */
772 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
773 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
775 bool ok; /* False if I/O error occurs. */
777 size_t by_cnt; /* Number of variables on BY subcommand. */
779 /* Names of FIRST, LAST variables. */
780 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
782 struct dictionary *dict; /* Dictionary of output file. */
783 struct casefile *output; /* MATCH FILES output. */
784 struct ccase mtf_case; /* Case used for output. */
786 unsigned seq_num; /* Have we initialized this variable? */
787 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
790 static bool mtf_free (struct mtf_proc *);
791 static bool mtf_close_file (struct mtf_file *);
792 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
793 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
795 static bool mtf_read_nonactive_records (void *);
796 static bool mtf_processing_finish (void *);
797 static bool mtf_processing (const struct ccase *, void *);
799 static char *var_type_description (struct variable *);
801 static void set_master (struct variable *, struct variable *master);
802 static struct variable *get_master (struct variable *);
804 /* Parse and execute the MATCH FILES command. */
806 cmd_match_files (void)
809 struct mtf_file *first_table = NULL;
810 struct mtf_file *iter;
812 bool used_active_file = false;
813 bool saw_table = false;
818 mtf.head = mtf.tail = NULL;
822 mtf.dict = dict_create ();
824 case_nullify (&mtf.mtf_case);
827 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
831 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
833 struct mtf_file *file = xmalloc (sizeof *file);
835 if (lex_match_id ("FILE"))
836 file->type = MTF_FILE;
837 else if (lex_match_id ("TABLE"))
839 file->type = MTF_TABLE;
850 file->in_name = NULL;
852 case_nullify (&file->input);
854 /* FILEs go first, then TABLEs. */
855 if (file->type == MTF_TABLE || first_table == NULL)
858 file->prev = mtf.tail;
860 mtf.tail->next = file;
862 if (mtf.head == NULL)
864 if (file->type == MTF_TABLE && first_table == NULL)
869 assert (file->type == MTF_FILE);
870 file->next = first_table;
871 file->prev = first_table->prev;
872 if (first_table->prev)
873 first_table->prev->next = file;
876 first_table->prev = file;
884 if (used_active_file)
886 msg (SE, _("The active file may not be specified more "
890 used_active_file = true;
892 if (!proc_has_source ())
894 msg (SE, _("Cannot specify the active file since no active "
895 "file has been defined."));
899 if (proc_make_temporary_transformations_permanent ())
901 _("MATCH FILES may not be used after TEMPORARY when "
902 "the active file is an input source. "
903 "Temporary transformations will be made permanent."));
905 file->dict = default_dict;
909 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
910 if (file->handle == NULL)
913 file->reader = any_reader_open (file->handle, &file->dict);
914 if (file->reader == NULL)
917 case_create (&file->input, dict_get_next_value_idx (file->dict));
920 while (lex_match ('/'))
921 if (lex_match_id ("RENAME"))
923 if (!rename_variables (file->dict))
926 else if (lex_match_id ("IN"))
935 if (file->in_name != NULL)
937 msg (SE, _("Multiple IN subcommands for a single FILE or "
941 file->in_name = xstrdup (tokid);
946 mtf_merge_dictionary (mtf.dict, file);
951 if (lex_match (T_BY))
953 struct variable **by;
957 msg (SE, _("BY may appear at most once."));
962 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
963 PV_NO_DUPLICATE | PV_NO_SCRATCH))
966 for (iter = mtf.head; iter != NULL; iter = iter->next)
970 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
972 for (i = 0; i < mtf.by_cnt; i++)
974 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
975 if (iter->by[i] == NULL)
977 msg (SE, _("File %s lacks BY variable %s."),
978 iter->handle ? fh_get_name (iter->handle) : "*",
987 else if (lex_match_id ("FIRST"))
989 if (mtf.first[0] != '\0')
991 msg (SE, _("FIRST may appear at most once."));
996 if (!lex_force_id ())
998 strcpy (mtf.first, tokid);
1001 else if (lex_match_id ("LAST"))
1003 if (mtf.last[0] != '\0')
1005 msg (SE, _("LAST may appear at most once."));
1010 if (!lex_force_id ())
1012 strcpy (mtf.last, tokid);
1015 else if (lex_match_id ("MAP"))
1019 else if (lex_match_id ("DROP"))
1021 if (!drop_variables (mtf.dict))
1024 else if (lex_match_id ("KEEP"))
1026 if (!keep_variables (mtf.dict))
1035 if (!lex_match ('/') && token != '.')
1037 lex_end_of_command ();
1042 if (mtf.by_cnt == 0)
1046 msg (SE, _("BY is required when TABLE is specified."));
1051 msg (SE, _("BY is required when IN is specified."));
1056 /* Set up mapping from each file's variables to master
1058 for (iter = mtf.head; iter != NULL; iter = iter->next)
1060 struct dictionary *d = iter->dict;
1063 for (i = 0; i < dict_get_var_cnt (d); i++)
1065 struct variable *v = dict_get_var (d, i);
1066 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1072 /* Add IN variables to master dictionary. */
1073 for (iter = mtf.head; iter != NULL; iter = iter->next)
1074 if (iter->in_name != NULL)
1076 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1077 if (iter->in_var == NULL)
1079 msg (SE, _("IN variable name %s duplicates an "
1080 "existing variable name."),
1081 iter->in_var->name);
1084 iter->in_var->print = iter->in_var->write
1085 = make_output_format (FMT_F, 1, 0);
1088 /* MATCH FILES performs an n-way merge on all its input files.
1091 1. Read one input record from every input FILE.
1093 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1095 3. Find the FILE input record(s) that have minimum BY
1096 values. Store all the values from these input records into
1099 4. For every TABLE, read another record as long as the BY values
1100 on the TABLE's input record are less than the FILEs' BY values.
1101 If an exact match is found, store all the values from the TABLE
1102 input record into the output record.
1104 5. Write the output record.
1106 6. Read another record from each input file FILE and TABLE that
1107 we stored values from above. If we come to the end of one of the
1108 input files, remove it from the list of input files.
1110 7. Repeat from step 2.
1112 Unfortunately, this algorithm can't be implemented in a
1113 straightforward way because there's no function to read a
1114 record from the active file. Instead, it has to be written
1117 FIXME: For merging large numbers of files (more than 10?) a
1118 better algorithm would use a heap for finding minimum
1121 if (!used_active_file)
1122 discard_variables ();
1124 dict_compact_values (mtf.dict);
1125 mtf.output = casefile_create (dict_get_next_value_idx (mtf.dict));
1126 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1127 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1129 if (!mtf_read_nonactive_records (&mtf))
1132 if (used_active_file)
1134 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1135 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1138 ok = mtf_processing_finish (&mtf);
1140 discard_variables ();
1142 default_dict = mtf.dict;
1144 proc_set_source (storage_source_create (mtf.output));
1147 if (!mtf_free (&mtf))
1149 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1153 return CMD_CASCADING_FAILURE;
1156 /* Repeats 2...7 an arbitrary number of times. */
1158 mtf_processing_finish (void *mtf_)
1160 struct mtf_proc *mtf = mtf_;
1161 struct mtf_file *iter;
1163 /* Find the active file and delete it. */
1164 for (iter = mtf->head; iter; iter = iter->next)
1165 if (iter->handle == NULL)
1167 if (!mtf_delete_file_in_place (mtf, &iter))
1172 while (mtf->head && mtf->head->type == MTF_FILE)
1173 if (!mtf_processing (NULL, mtf))
1179 /* Return a string in a static buffer describing V's variable type and
1182 var_type_description (struct variable *v)
1184 static char buf[2][32];
1191 if (v->type == NUMERIC)
1192 strcpy (s, "numeric");
1195 assert (v->type == ALPHA);
1196 sprintf (s, "string with width %d", v->width);
1201 /* Closes FILE and frees its associated data.
1202 Returns true if successful, false if an I/O error
1203 occurred on FILE. */
1205 mtf_close_file (struct mtf_file *file)
1207 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1209 any_reader_close (file->reader);
1210 if (file->handle != NULL)
1211 dict_destroy (file->dict);
1212 case_destroy (&file->input);
1213 free (file->in_name);
1218 /* Free all the data for the MATCH FILES procedure.
1219 Returns true if successful, false if an I/O error
1222 mtf_free (struct mtf_proc *mtf)
1224 struct mtf_file *iter, *next;
1227 for (iter = mtf->head; iter; iter = next)
1230 assert (iter->dict != mtf->dict);
1231 if (!mtf_close_file (iter))
1236 dict_destroy (mtf->dict);
1237 case_destroy (&mtf->mtf_case);
1238 free (mtf->seq_nums);
1243 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1244 file in the chain, or to NULL if was the last in the chain.
1245 Returns true if successful, false if an I/O error occurred. */
1247 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1249 struct mtf_file *f = *file;
1253 f->prev->next = f->next;
1255 f->next->prev = f->prev;
1257 mtf->head = f->next;
1259 mtf->tail = f->prev;
1262 if (f->in_var != NULL)
1263 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1264 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1266 struct variable *v = dict_get_var (f->dict, i);
1267 struct variable *mv = get_master (v);
1270 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1272 if (v->type == NUMERIC)
1275 memset (out->s, ' ', v->width);
1279 return mtf_close_file (f);
1282 /* Read a record from every input file except the active file.
1283 Returns true if successful, false if an I/O error occurred. */
1285 mtf_read_nonactive_records (void *mtf_)
1287 struct mtf_proc *mtf = mtf_;
1288 struct mtf_file *iter, *next;
1291 for (iter = mtf->head; ok && iter != NULL; iter = next)
1294 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1295 if (!mtf_delete_file_in_place (mtf, &iter))
1301 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1302 if A == B, 1 if A > B. */
1304 mtf_compare_BY_values (struct mtf_proc *mtf,
1305 struct mtf_file *a, struct mtf_file *b,
1306 const struct ccase *c)
1308 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1309 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1310 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1311 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1314 /* Perform one iteration of steps 3...7 above.
1315 Returns true if successful, false if an I/O error occurred. */
1317 mtf_processing (const struct ccase *c, void *mtf_)
1319 struct mtf_proc *mtf = mtf_;
1321 /* Do we need another record from the active file? */
1322 bool read_active_file;
1324 assert (mtf->head != NULL);
1325 if (mtf->head->type == MTF_TABLE)
1330 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1331 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1332 struct mtf_file *iter, *next;
1334 read_active_file = false;
1336 /* 3. Find the FILE input record(s) that have minimum BY
1337 values. Store all the values from these input records into
1338 the output record. */
1339 min_head = min_tail = mtf->head;
1340 max_head = max_tail = NULL;
1341 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1344 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1348 max_tail = max_tail->next_min = iter;
1350 max_head = max_tail = iter;
1353 min_tail = min_tail->next_min = iter;
1358 max_tail->next_min = min_head;
1359 max_tail = min_tail;
1363 max_head = min_head;
1364 max_tail = min_tail;
1366 min_head = min_tail = iter;
1370 /* 4. For every TABLE, read another record as long as the BY
1371 values on the TABLE's input record are less than the FILEs'
1372 BY values. If an exact match is found, store all the values
1373 from the TABLE input record into the output record. */
1374 for (; iter != NULL; iter = next)
1376 assert (iter->type == MTF_TABLE);
1381 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1385 max_tail = max_tail->next_min = iter;
1387 max_head = max_tail = iter;
1390 min_tail = min_tail->next_min = iter;
1393 if (iter->handle == NULL)
1395 if (any_reader_read (iter->reader, &iter->input))
1397 if (!mtf_delete_file_in_place (mtf, &iter))
1404 /* Next sequence number. */
1407 /* Store data to all the records we are using. */
1409 min_tail->next_min = NULL;
1410 for (iter = min_head; iter; iter = iter->next_min)
1414 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1416 struct variable *v = dict_get_var (iter->dict, i);
1417 struct variable *mv = get_master (v);
1419 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1421 const struct ccase *record
1422 = case_is_null (&iter->input) ? c : &iter->input;
1423 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1425 mtf->seq_nums[mv->index] = mtf->seq_num;
1426 if (v->type == NUMERIC)
1427 out->f = case_num (record, v->fv);
1429 memcpy (out->s, case_str (record, v->fv), v->width);
1432 if (iter->in_var != NULL)
1433 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1435 if (iter->type == MTF_FILE && iter->handle == NULL)
1436 read_active_file = true;
1439 /* Store missing values to all the records we're not
1442 max_tail->next_min = NULL;
1443 for (iter = max_head; iter; iter = iter->next_min)
1447 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1449 struct variable *v = dict_get_var (iter->dict, i);
1450 struct variable *mv = get_master (v);
1452 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1454 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1455 mtf->seq_nums[mv->index] = mtf->seq_num;
1457 if (v->type == NUMERIC)
1460 memset (out->s, ' ', v->width);
1463 if (iter->in_var != NULL)
1464 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1467 /* 5. Write the output record. */
1468 casefile_append (mtf->output, &mtf->mtf_case);
1470 /* 6. Read another record from each input file FILE and TABLE
1471 that we stored values from above. If we come to the end of
1472 one of the input files, remove it from the list of input
1474 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1476 next = iter->next_min;
1477 if (iter->reader != NULL
1478 && !any_reader_read (iter->reader, &iter->input))
1479 if (!mtf_delete_file_in_place (mtf, &iter))
1483 while (!read_active_file
1484 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1489 /* Merge the dictionary for file F into master dictionary M. */
1491 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1493 struct dictionary *d = f->dict;
1494 const char *d_docs, *m_docs;
1497 if (dict_get_label (m) == NULL)
1498 dict_set_label (m, dict_get_label (d));
1500 d_docs = dict_get_documents (d);
1501 m_docs = dict_get_documents (m);
1505 dict_set_documents (m, d_docs);
1511 new_len = strlen (m_docs) + strlen (d_docs);
1512 new_docs = xmalloc (new_len + 1);
1513 strcpy (new_docs, m_docs);
1514 strcat (new_docs, d_docs);
1515 dict_set_documents (m, new_docs);
1520 for (i = 0; i < dict_get_var_cnt (d); i++)
1522 struct variable *dv = dict_get_var (d, i);
1523 struct variable *mv = dict_lookup_var (m, dv->name);
1525 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1530 if (mv->width != dv->width)
1532 msg (SE, _("Variable %s in file %s (%s) has different "
1533 "type or width from the same variable in "
1534 "earlier file (%s)."),
1535 dv->name, fh_get_name (f->handle),
1536 var_type_description (dv), var_type_description (mv));
1540 if (dv->width == mv->width)
1542 if (val_labs_count (dv->val_labs)
1543 && !val_labs_count (mv->val_labs))
1545 val_labs_destroy (mv->val_labs);
1546 mv->val_labs = val_labs_copy (dv->val_labs);
1548 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1549 mv_copy (&mv->miss, &dv->miss);
1552 if (dv->label && !mv->label)
1553 mv->label = xstrdup (dv->label);
1556 mv = dict_clone_var_assert (m, dv, dv->name);
1562 /* Marks V's master variable as MASTER. */
1564 set_master (struct variable *v, struct variable *master)
1566 var_attach_aux (v, master, NULL);
1569 /* Returns the master variable corresponding to V,
1570 as set with set_master(). */
1571 static struct variable *
1572 get_master (struct variable *v)
1581 A case map copies data from a case that corresponds for one
1582 dictionary to a case that corresponds to a second dictionary
1583 derived from the first by, optionally, deleting, reordering,
1584 or renaming variables. (No new variables may be created.)
1590 size_t value_cnt; /* Number of values in map. */
1591 int *map; /* For each destination index, the
1592 corresponding source index. */
1595 /* Prepares dictionary D for producing a case map. Afterward,
1596 the caller may delete, reorder, or rename variables within D
1597 at will before using finish_case_map() to produce the case
1600 Uses D's aux members, which must otherwise not be in use. */
1602 start_case_map (struct dictionary *d)
1604 size_t var_cnt = dict_get_var_cnt (d);
1607 for (i = 0; i < var_cnt; i++)
1609 struct variable *v = dict_get_var (d, i);
1610 int *src_fv = xmalloc (sizeof *src_fv);
1612 var_attach_aux (v, src_fv, var_dtor_free);
1616 /* Produces a case map from dictionary D, which must have been
1617 previously prepared with start_case_map().
1619 Does not retain any reference to D, and clears the aux members
1620 set up by start_case_map().
1622 Returns the new case map, or a null pointer if no mapping is
1623 required (that is, no data has changed position). */
1624 static struct case_map *
1625 finish_case_map (struct dictionary *d)
1627 struct case_map *map;
1628 size_t var_cnt = dict_get_var_cnt (d);
1632 map = xmalloc (sizeof *map);
1633 map->value_cnt = dict_get_next_value_idx (d);
1634 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1635 for (i = 0; i < map->value_cnt; i++)
1639 for (i = 0; i < var_cnt; i++)
1641 struct variable *v = dict_get_var (d, i);
1642 int *src_fv = (int *) var_detach_aux (v);
1645 if (v->fv != *src_fv)
1648 for (idx = 0; idx < v->nv; idx++)
1650 int src_idx = *src_fv + idx;
1651 int dst_idx = v->fv + idx;
1653 assert (map->map[dst_idx] == -1);
1654 map->map[dst_idx] = src_idx;
1661 destroy_case_map (map);
1665 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1671 /* Maps from SRC to DST, applying case map MAP. */
1673 map_case (const struct case_map *map,
1674 const struct ccase *src, struct ccase *dst)
1678 assert (map != NULL);
1679 assert (src != NULL);
1680 assert (dst != NULL);
1681 assert (src != dst);
1683 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1685 int src_idx = map->map[dst_idx];
1687 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1691 /* Destroys case map MAP. */
1693 destroy_case_map (struct case_map *map)