1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <libpspp/message.h>
23 #include <libpspp/alloc.h>
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case.h>
27 #include <language/command.h>
28 #include <libpspp/compiler.h>
29 #include <data/dictionary.h>
30 #include <libpspp/message.h>
31 #include <language/data-io/file-handle.h>
32 #include <libpspp/hash.h>
33 #include <language/lexer/lexer.h>
34 #include <libpspp/misc.h>
35 #include <data/por-file-writer.h>
36 #include <data/settings.h>
37 #include <data/sys-file-writer.h>
38 #include <libpspp/str.h>
39 #include <data/value-labels.h>
40 #include <data/variable.h>
41 #include <procedure.h>
44 #define _(msgid) gettext (msgid)
46 /* Rearranging and reducing a dictionary. */
47 static void start_case_map (struct dictionary *);
48 static struct case_map *finish_case_map (struct dictionary *);
49 static void map_case (const struct case_map *,
50 const struct ccase *, struct ccase *);
51 static void destroy_case_map (struct case_map *);
53 static bool parse_dict_trim (struct dictionary *);
55 /* Reading system and portable files. */
57 /* Type of command. */
64 /* Case reader input program. */
65 struct case_reader_pgm
67 struct any_reader *reader; /* File reader. */
68 struct case_map *map; /* Map from file dict to active file dict. */
69 struct ccase bounce; /* Bounce buffer. */
72 static const struct case_source_class case_reader_source_class;
74 static void case_reader_pgm_free (struct case_reader_pgm *);
76 /* Parses a GET or IMPORT command. */
78 parse_read_command (enum reader_command type)
80 struct case_reader_pgm *pgm = NULL;
81 struct file_handle *fh = NULL;
82 struct dictionary *dict = NULL;
88 if (lex_match_id ("FILE") || token == T_STRING)
92 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
96 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
100 if (lex_match_id ("COMM"))
102 else if (lex_match_id ("TAPE"))
106 lex_error (_("expecting COMM or TAPE"));
116 lex_sbc_missing ("FILE");
120 discard_variables ();
122 pgm = xmalloc (sizeof *pgm);
123 pgm->reader = any_reader_open (fh, &dict);
125 case_nullify (&pgm->bounce);
126 if (pgm->reader == NULL)
129 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
131 start_case_map (dict);
136 if (!parse_dict_trim (dict))
140 pgm->map = finish_case_map (dict);
142 dict_destroy (default_dict);
145 vfm_source = create_case_source (&case_reader_source_class, pgm);
150 case_reader_pgm_free (pgm);
153 return CMD_CASCADING_FAILURE;
156 /* Frees a struct case_reader_pgm. */
158 case_reader_pgm_free (struct case_reader_pgm *pgm)
162 any_reader_close (pgm->reader);
163 destroy_case_map (pgm->map);
164 case_destroy (&pgm->bounce);
169 /* Clears internal state related to case reader input procedure. */
171 case_reader_source_destroy (struct case_source *source)
173 struct case_reader_pgm *pgm = source->aux;
174 case_reader_pgm_free (pgm);
177 /* Reads all the cases from the data file into C and passes them
178 to WRITE_CASE one by one, passing WC_DATA.
179 Returns true if successful, false if an I/O error occurred. */
181 case_reader_source_read (struct case_source *source,
183 write_case_func *write_case, write_case_data wc_data)
185 struct case_reader_pgm *pgm = source->aux;
191 if (pgm->map == NULL)
192 got_case = any_reader_read (pgm->reader, c);
195 got_case = any_reader_read (pgm->reader, &pgm->bounce);
197 map_case (pgm->map, &pgm->bounce, c);
202 ok = write_case (wc_data);
206 return ok && !any_reader_error (pgm->reader);
209 static const struct case_source_class case_reader_source_class =
213 case_reader_source_read,
214 case_reader_source_destroy,
221 return parse_read_command (GET_CMD);
228 return parse_read_command (IMPORT_CMD);
231 /* Writing system and portable files. */
233 /* Type of output file. */
236 SYSFILE_WRITER, /* System file. */
237 PORFILE_WRITER /* Portable file. */
240 /* Type of a command. */
243 XFORM_CMD, /* Transformation. */
244 PROC_CMD /* Procedure. */
247 /* File writer plus a case map. */
250 struct any_writer *writer; /* File writer. */
251 struct case_map *map; /* Map to output file dictionary
252 (null pointer for identity mapping). */
253 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
258 case_writer_destroy (struct case_writer *aw)
263 ok = any_writer_close (aw->writer);
264 destroy_case_map (aw->map);
265 case_destroy (&aw->bounce);
271 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
272 WRITER_TYPE identifies the type of file to write,
273 and COMMAND_TYPE identifies the type of command.
275 On success, returns a writer.
276 For procedures only, sets *RETAIN_UNSELECTED to true if cases
277 that would otherwise be excluded by FILTER or USE should be
280 On failure, returns a null pointer. */
281 static struct case_writer *
282 parse_write_command (enum writer_type writer_type,
283 enum command_type command_type,
284 bool *retain_unselected)
287 struct file_handle *handle; /* Output file. */
288 struct dictionary *dict; /* Dictionary for output file. */
289 struct case_writer *aw; /* Writer. */
291 /* Common options. */
292 bool print_map; /* Print map? TODO. */
293 bool print_short_names; /* Print long-to-short name map. TODO. */
294 struct sfm_write_options sysfile_opts;
295 struct pfm_write_options porfile_opts;
297 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
298 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
299 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
301 if (command_type == PROC_CMD)
302 *retain_unselected = true;
305 dict = dict_clone (default_dict);
306 aw = xmalloc (sizeof *aw);
309 case_nullify (&aw->bounce);
311 print_short_names = false;
312 sysfile_opts = sfm_writer_default_options ();
313 porfile_opts = pfm_writer_default_options ();
315 start_case_map (dict);
316 dict_delete_scratch_vars (dict);
321 if (lex_match_id ("OUTFILE"))
325 lex_sbc_only_once ("OUTFILE");
331 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
335 else if (lex_match_id ("NAMES"))
336 print_short_names = true;
337 else if (lex_match_id ("PERMISSIONS"))
342 if (lex_match_id ("READONLY"))
344 else if (lex_match_id ("WRITEABLE"))
348 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
351 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
353 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
356 if (lex_match_id ("RETAIN"))
357 *retain_unselected = true;
358 else if (lex_match_id ("DELETE"))
359 *retain_unselected = false;
362 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
366 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
367 sysfile_opts.compress = true;
368 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
369 sysfile_opts.compress = false;
370 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
373 if (!lex_force_int ())
375 sysfile_opts.version = lex_integer ();
378 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
381 if (lex_match_id ("COMMUNICATIONS"))
382 porfile_opts.type = PFM_COMM;
383 else if (lex_match_id ("TAPE"))
384 porfile_opts.type = PFM_TAPE;
387 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
391 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
394 if (!lex_force_int ())
396 porfile_opts.digits = lex_integer ();
399 else if (!parse_dict_trim (dict))
402 if (!lex_match ('/'))
405 if (lex_end_of_command () != CMD_SUCCESS)
410 lex_sbc_missing ("OUTFILE");
414 dict_compact_values (dict);
415 aw->map = finish_case_map (dict);
417 case_create (&aw->bounce, dict_get_next_value_idx (dict));
419 if (fh_get_referent (handle) == FH_REF_FILE)
424 aw->writer = any_writer_from_sfm_writer (
425 sfm_open_writer (handle, dict, sysfile_opts));
428 aw->writer = any_writer_from_pfm_writer (
429 pfm_open_writer (handle, dict, porfile_opts));
434 aw->writer = any_writer_open (handle, dict);
440 case_writer_destroy (aw);
445 /* Writes case C to writer AW. */
447 case_writer_write_case (struct case_writer *aw, struct ccase *c)
451 map_case (aw->map, c, &aw->bounce);
454 return any_writer_write (aw->writer, c);
457 /* SAVE and EXPORT. */
459 static bool output_proc (struct ccase *, void *);
461 /* Parses and performs the SAVE or EXPORT procedure. */
463 parse_output_proc (enum writer_type writer_type)
465 bool retain_unselected;
466 struct variable *saved_filter_variable;
467 struct case_writer *aw;
470 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
472 return CMD_CASCADING_FAILURE;
474 saved_filter_variable = dict_get_filter (default_dict);
475 if (retain_unselected)
476 dict_set_filter (default_dict, NULL);
477 ok = procedure (output_proc, aw);
478 dict_set_filter (default_dict, saved_filter_variable);
480 case_writer_destroy (aw);
481 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
484 /* Writes case C to file. */
486 output_proc (struct ccase *c, void *aw_)
488 struct case_writer *aw = aw_;
489 return case_writer_write_case (aw, c);
495 return parse_output_proc (SYSFILE_WRITER);
501 return parse_output_proc (PORFILE_WRITER);
504 /* XSAVE and XEXPORT. */
506 /* Transformation. */
509 struct case_writer *aw; /* Writer. */
512 static trns_proc_func output_trns_proc;
513 static trns_free_func output_trns_free;
515 /* Parses the XSAVE or XEXPORT transformation command. */
517 parse_output_trns (enum writer_type writer_type)
519 struct output_trns *t = xmalloc (sizeof *t);
520 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
524 return CMD_CASCADING_FAILURE;
527 add_transformation (output_trns_proc, output_trns_free, t);
531 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
533 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
535 struct output_trns *t = trns_;
536 case_writer_write_case (t->aw, c);
537 return TRNS_CONTINUE;
540 /* Frees an XSAVE or XEXPORT transformation.
541 Returns true if successful, false if an I/O error occurred. */
543 output_trns_free (void *trns_)
545 struct output_trns *t = trns_;
550 ok = case_writer_destroy (t->aw);
560 return parse_output_trns (SYSFILE_WRITER);
563 /* XEXPORT command. */
567 return parse_output_trns (PORFILE_WRITER);
570 static bool rename_variables (struct dictionary *dict);
571 static bool drop_variables (struct dictionary *dict);
572 static bool keep_variables (struct dictionary *dict);
574 /* Commands that read and write system files share a great deal
575 of common syntactic structure for rearranging and dropping
576 variables. This function parses this syntax and modifies DICT
577 appropriately. Returns true on success, false on failure. */
579 parse_dict_trim (struct dictionary *dict)
581 if (lex_match_id ("MAP"))
586 else if (lex_match_id ("DROP"))
587 return drop_variables (dict);
588 else if (lex_match_id ("KEEP"))
589 return keep_variables (dict);
590 else if (lex_match_id ("RENAME"))
591 return rename_variables (dict);
594 lex_error (_("expecting a valid subcommand"));
599 /* Parses and performs the RENAME subcommand of GET and SAVE. */
601 rename_variables (struct dictionary *dict)
619 v = parse_dict_variable (dict);
622 if (!lex_force_match ('=')
625 if (dict_lookup_var (dict, tokid) != NULL)
627 msg (SE, _("Cannot rename %s as %s because there already exists "
628 "a variable named %s. To rename variables with "
629 "overlapping names, use a single RENAME subcommand "
630 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
631 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
635 dict_rename_var (dict, v, tokid);
644 while (lex_match ('('))
648 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
650 if (!lex_match ('='))
652 msg (SE, _("`=' expected after variable list."));
655 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
659 msg (SE, _("Number of variables on left side of `=' (%d) does not "
660 "match number of variables on right side (%d), in "
661 "parenthesized group %d of RENAME subcommand."),
662 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
665 if (!lex_force_match (')'))
670 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
672 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
678 for (i = 0; i < nn; i++)
686 /* Parses and performs the DROP subcommand of GET and SAVE.
687 Returns true if successful, false on failure.*/
689 drop_variables (struct dictionary *dict)
695 if (!parse_variables (dict, &v, &nv, PV_NONE))
697 dict_delete_vars (dict, v, nv);
700 if (dict_get_var_cnt (dict) == 0)
702 msg (SE, _("Cannot DROP all variables from dictionary."));
708 /* Parses and performs the KEEP subcommand of GET and SAVE.
709 Returns true if successful, false on failure.*/
711 keep_variables (struct dictionary *dict)
718 if (!parse_variables (dict, &v, &nv, PV_NONE))
721 /* Move the specified variables to the beginning. */
722 dict_reorder_vars (dict, v, nv);
724 /* Delete the remaining variables. */
725 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
726 for (i = nv; i < dict_get_var_cnt (dict); i++)
727 v[i - nv] = dict_get_var (dict, i);
728 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
739 MTF_FILE, /* Specified on FILE= subcommand. */
740 MTF_TABLE /* Specified on TABLE= subcommand. */
743 /* One of the files on MATCH FILES. */
746 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
747 struct mtf_file *next_min; /* Next in the chain of minimums. */
749 int type; /* One of MTF_*. */
750 struct variable **by; /* List of BY variables for this file. */
751 struct file_handle *handle; /* File handle. */
752 struct any_reader *reader; /* File reader. */
753 struct dictionary *dict; /* Dictionary from system file. */
756 char *in_name; /* Variable name. */
757 struct variable *in_var; /* Variable (in master dictionary). */
759 struct ccase input; /* Input record. */
762 /* MATCH FILES procedure. */
765 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
766 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
768 bool ok; /* False if I/O error occurs. */
770 size_t by_cnt; /* Number of variables on BY subcommand. */
772 /* Names of FIRST, LAST variables. */
773 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
775 struct dictionary *dict; /* Dictionary of output file. */
776 struct case_sink *sink; /* Sink to receive output. */
777 struct ccase mtf_case; /* Case used for output. */
779 unsigned seq_num; /* Have we initialized this variable? */
780 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
783 static bool mtf_free (struct mtf_proc *);
784 static bool mtf_close_file (struct mtf_file *);
785 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
786 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
788 static bool mtf_read_nonactive_records (void *);
789 static bool mtf_processing_finish (void *);
790 static bool mtf_processing (struct ccase *, void *);
792 static char *var_type_description (struct variable *);
794 static void set_master (struct variable *, struct variable *master);
795 static struct variable *get_master (struct variable *);
797 /* Parse and execute the MATCH FILES command. */
799 cmd_match_files (void)
802 struct mtf_file *first_table = NULL;
803 struct mtf_file *iter;
805 bool used_active_file = false;
806 bool saw_table = false;
811 mtf.head = mtf.tail = NULL;
815 mtf.dict = dict_create ();
817 case_nullify (&mtf.mtf_case);
820 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
824 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
826 struct mtf_file *file = xmalloc (sizeof *file);
828 if (lex_match_id ("FILE"))
829 file->type = MTF_FILE;
830 else if (lex_match_id ("TABLE"))
832 file->type = MTF_TABLE;
843 file->in_name = NULL;
845 case_nullify (&file->input);
847 /* FILEs go first, then TABLEs. */
848 if (file->type == MTF_TABLE || first_table == NULL)
851 file->prev = mtf.tail;
853 mtf.tail->next = file;
855 if (mtf.head == NULL)
857 if (file->type == MTF_TABLE && first_table == NULL)
862 assert (file->type == MTF_FILE);
863 file->next = first_table;
864 file->prev = first_table->prev;
865 if (first_table->prev)
866 first_table->prev->next = file;
869 first_table->prev = file;
877 if (used_active_file)
879 msg (SE, _("The active file may not be specified more "
883 used_active_file = true;
885 if (vfm_source == NULL)
887 msg (SE, _("Cannot specify the active file since no active "
888 "file has been defined."));
895 _("MATCH FILES may not be used after TEMPORARY when "
896 "the active file is an input source. "
897 "Temporary transformations will be made permanent."));
901 file->dict = default_dict;
905 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
906 if (file->handle == NULL)
909 file->reader = any_reader_open (file->handle, &file->dict);
910 if (file->reader == NULL)
913 case_create (&file->input, dict_get_next_value_idx (file->dict));
916 while (lex_match ('/'))
917 if (lex_match_id ("RENAME"))
919 if (!rename_variables (file->dict))
922 else if (lex_match_id ("IN"))
931 if (file->in_name != NULL)
933 msg (SE, _("Multiple IN subcommands for a single FILE or "
937 file->in_name = xstrdup (tokid);
942 mtf_merge_dictionary (mtf.dict, file);
947 if (lex_match (T_BY))
949 struct variable **by;
953 msg (SE, _("BY may appear at most once."));
958 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
959 PV_NO_DUPLICATE | PV_NO_SCRATCH))
962 for (iter = mtf.head; iter != NULL; iter = iter->next)
966 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
968 for (i = 0; i < mtf.by_cnt; i++)
970 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
971 if (iter->by[i] == NULL)
973 msg (SE, _("File %s lacks BY variable %s."),
974 iter->handle ? fh_get_name (iter->handle) : "*",
983 else if (lex_match_id ("FIRST"))
985 if (mtf.first[0] != '\0')
987 msg (SE, _("FIRST may appear at most once."));
992 if (!lex_force_id ())
994 strcpy (mtf.first, tokid);
997 else if (lex_match_id ("LAST"))
999 if (mtf.last[0] != '\0')
1001 msg (SE, _("LAST may appear at most once."));
1006 if (!lex_force_id ())
1008 strcpy (mtf.last, tokid);
1011 else if (lex_match_id ("MAP"))
1015 else if (lex_match_id ("DROP"))
1017 if (!drop_variables (mtf.dict))
1020 else if (lex_match_id ("KEEP"))
1022 if (!keep_variables (mtf.dict))
1031 if (!lex_match ('/') && token != '.')
1033 lex_end_of_command ();
1038 if (mtf.by_cnt == 0)
1042 msg (SE, _("BY is required when TABLE is specified."));
1047 msg (SE, _("BY is required when IN is specified."));
1052 /* Set up mapping from each file's variables to master
1054 for (iter = mtf.head; iter != NULL; iter = iter->next)
1056 struct dictionary *d = iter->dict;
1059 for (i = 0; i < dict_get_var_cnt (d); i++)
1061 struct variable *v = dict_get_var (d, i);
1062 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1068 /* Add IN variables to master dictionary. */
1069 for (iter = mtf.head; iter != NULL; iter = iter->next)
1070 if (iter->in_name != NULL)
1072 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1073 if (iter->in_var == NULL)
1075 msg (SE, _("IN variable name %s duplicates an "
1076 "existing variable name."),
1077 iter->in_var->name);
1080 iter->in_var->print = iter->in_var->write
1081 = make_output_format (FMT_F, 1, 0);
1084 /* MATCH FILES performs an n-way merge on all its input files.
1087 1. Read one input record from every input FILE.
1089 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1091 3. Find the FILE input record(s) that have minimum BY
1092 values. Store all the values from these input records into
1095 4. For every TABLE, read another record as long as the BY values
1096 on the TABLE's input record are less than the FILEs' BY values.
1097 If an exact match is found, store all the values from the TABLE
1098 input record into the output record.
1100 5. Write the output record.
1102 6. Read another record from each input file FILE and TABLE that
1103 we stored values from above. If we come to the end of one of the
1104 input files, remove it from the list of input files.
1106 7. Repeat from step 2.
1108 Unfortunately, this algorithm can't be implemented in a
1109 straightforward way because there's no function to read a
1110 record from the active file. Instead, it has to be written
1113 FIXME: For merging large numbers of files (more than 10?) a
1114 better algorithm would use a heap for finding minimum
1117 if (!used_active_file)
1118 discard_variables ();
1120 dict_compact_values (mtf.dict);
1121 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1122 if (mtf.sink->class->open != NULL)
1123 mtf.sink->class->open (mtf.sink);
1125 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1126 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1128 if (!mtf_read_nonactive_records (&mtf))
1131 if (used_active_file)
1132 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1134 ok = mtf_processing_finish (&mtf);
1136 free_case_source (vfm_source);
1139 dict_destroy (default_dict);
1140 default_dict = mtf.dict;
1142 vfm_source = mtf.sink->class->make_source (mtf.sink);
1143 free_case_sink (mtf.sink);
1145 if (!mtf_free (&mtf))
1147 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1151 return CMD_CASCADING_FAILURE;
1154 /* Repeats 2...7 an arbitrary number of times. */
1156 mtf_processing_finish (void *mtf_)
1158 struct mtf_proc *mtf = mtf_;
1159 struct mtf_file *iter;
1161 /* Find the active file and delete it. */
1162 for (iter = mtf->head; iter; iter = iter->next)
1163 if (iter->handle == NULL)
1165 if (!mtf_delete_file_in_place (mtf, &iter))
1170 while (mtf->head && mtf->head->type == MTF_FILE)
1171 if (!mtf_processing (NULL, mtf))
1177 /* Return a string in a static buffer describing V's variable type and
1180 var_type_description (struct variable *v)
1182 static char buf[2][32];
1189 if (v->type == NUMERIC)
1190 strcpy (s, "numeric");
1193 assert (v->type == ALPHA);
1194 sprintf (s, "string with width %d", v->width);
1199 /* Closes FILE and frees its associated data.
1200 Returns true if successful, false if an I/O error
1201 occurred on FILE. */
1203 mtf_close_file (struct mtf_file *file)
1205 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1207 any_reader_close (file->reader);
1208 if (file->handle != NULL)
1209 dict_destroy (file->dict);
1210 case_destroy (&file->input);
1211 free (file->in_name);
1216 /* Free all the data for the MATCH FILES procedure.
1217 Returns true if successful, false if an I/O error
1220 mtf_free (struct mtf_proc *mtf)
1222 struct mtf_file *iter, *next;
1225 for (iter = mtf->head; iter; iter = next)
1228 assert (iter->dict != mtf->dict);
1229 if (!mtf_close_file (iter))
1234 dict_destroy (mtf->dict);
1235 case_destroy (&mtf->mtf_case);
1236 free (mtf->seq_nums);
1241 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1242 file in the chain, or to NULL if was the last in the chain.
1243 Returns true if successful, false if an I/O error occurred. */
1245 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1247 struct mtf_file *f = *file;
1251 f->prev->next = f->next;
1253 f->next->prev = f->prev;
1255 mtf->head = f->next;
1257 mtf->tail = f->prev;
1260 if (f->in_var != NULL)
1261 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1262 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1264 struct variable *v = dict_get_var (f->dict, i);
1265 struct variable *mv = get_master (v);
1268 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1270 if (v->type == NUMERIC)
1273 memset (out->s, ' ', v->width);
1277 return mtf_close_file (f);
1280 /* Read a record from every input file except the active file.
1281 Returns true if successful, false if an I/O error occurred. */
1283 mtf_read_nonactive_records (void *mtf_)
1285 struct mtf_proc *mtf = mtf_;
1286 struct mtf_file *iter, *next;
1289 for (iter = mtf->head; ok && iter != NULL; iter = next)
1292 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1293 if (!mtf_delete_file_in_place (mtf, &iter))
1299 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1300 if A == B, 1 if A > B. */
1302 mtf_compare_BY_values (struct mtf_proc *mtf,
1303 struct mtf_file *a, struct mtf_file *b,
1306 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1307 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1308 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1309 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1312 /* Perform one iteration of steps 3...7 above.
1313 Returns true if successful, false if an I/O error occurred. */
1315 mtf_processing (struct ccase *c, void *mtf_)
1317 struct mtf_proc *mtf = mtf_;
1319 /* Do we need another record from the active file? */
1320 bool read_active_file;
1322 assert (mtf->head != NULL);
1323 if (mtf->head->type == MTF_TABLE)
1328 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1329 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1330 struct mtf_file *iter, *next;
1332 read_active_file = false;
1334 /* 3. Find the FILE input record(s) that have minimum BY
1335 values. Store all the values from these input records into
1336 the output record. */
1337 min_head = min_tail = mtf->head;
1338 max_head = max_tail = NULL;
1339 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1342 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1346 max_tail = max_tail->next_min = iter;
1348 max_head = max_tail = iter;
1351 min_tail = min_tail->next_min = iter;
1356 max_tail->next_min = min_head;
1357 max_tail = min_tail;
1361 max_head = min_head;
1362 max_tail = min_tail;
1364 min_head = min_tail = iter;
1368 /* 4. For every TABLE, read another record as long as the BY
1369 values on the TABLE's input record are less than the FILEs'
1370 BY values. If an exact match is found, store all the values
1371 from the TABLE input record into the output record. */
1372 for (; iter != NULL; iter = next)
1374 assert (iter->type == MTF_TABLE);
1379 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1383 max_tail = max_tail->next_min = iter;
1385 max_head = max_tail = iter;
1388 min_tail = min_tail->next_min = iter;
1391 if (iter->handle == NULL)
1393 if (any_reader_read (iter->reader, &iter->input))
1395 if (!mtf_delete_file_in_place (mtf, &iter))
1402 /* Next sequence number. */
1405 /* Store data to all the records we are using. */
1407 min_tail->next_min = NULL;
1408 for (iter = min_head; iter; iter = iter->next_min)
1412 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1414 struct variable *v = dict_get_var (iter->dict, i);
1415 struct variable *mv = get_master (v);
1417 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1419 struct ccase *record
1420 = case_is_null (&iter->input) ? c : &iter->input;
1421 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1423 mtf->seq_nums[mv->index] = mtf->seq_num;
1424 if (v->type == NUMERIC)
1425 out->f = case_num (record, v->fv);
1427 memcpy (out->s, case_str (record, v->fv), v->width);
1430 if (iter->in_var != NULL)
1431 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1433 if (iter->type == MTF_FILE && iter->handle == NULL)
1434 read_active_file = true;
1437 /* Store missing values to all the records we're not
1440 max_tail->next_min = NULL;
1441 for (iter = max_head; iter; iter = iter->next_min)
1445 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1447 struct variable *v = dict_get_var (iter->dict, i);
1448 struct variable *mv = get_master (v);
1450 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1452 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1453 mtf->seq_nums[mv->index] = mtf->seq_num;
1455 if (v->type == NUMERIC)
1458 memset (out->s, ' ', v->width);
1461 if (iter->in_var != NULL)
1462 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1465 /* 5. Write the output record. */
1466 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1468 /* 6. Read another record from each input file FILE and TABLE
1469 that we stored values from above. If we come to the end of
1470 one of the input files, remove it from the list of input
1472 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1474 next = iter->next_min;
1475 if (iter->reader != NULL
1476 && !any_reader_read (iter->reader, &iter->input))
1477 if (!mtf_delete_file_in_place (mtf, &iter))
1481 while (!read_active_file
1482 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1487 /* Merge the dictionary for file F into master dictionary M. */
1489 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1491 struct dictionary *d = f->dict;
1492 const char *d_docs, *m_docs;
1495 if (dict_get_label (m) == NULL)
1496 dict_set_label (m, dict_get_label (d));
1498 d_docs = dict_get_documents (d);
1499 m_docs = dict_get_documents (m);
1503 dict_set_documents (m, d_docs);
1509 new_len = strlen (m_docs) + strlen (d_docs);
1510 new_docs = xmalloc (new_len + 1);
1511 strcpy (new_docs, m_docs);
1512 strcat (new_docs, d_docs);
1513 dict_set_documents (m, new_docs);
1518 for (i = 0; i < dict_get_var_cnt (d); i++)
1520 struct variable *dv = dict_get_var (d, i);
1521 struct variable *mv = dict_lookup_var (m, dv->name);
1523 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1528 if (mv->width != dv->width)
1530 msg (SE, _("Variable %s in file %s (%s) has different "
1531 "type or width from the same variable in "
1532 "earlier file (%s)."),
1533 dv->name, fh_get_name (f->handle),
1534 var_type_description (dv), var_type_description (mv));
1538 if (dv->width == mv->width)
1540 if (val_labs_count (dv->val_labs)
1541 && !val_labs_count (mv->val_labs))
1542 mv->val_labs = val_labs_copy (dv->val_labs);
1543 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1544 mv_copy (&mv->miss, &dv->miss);
1547 if (dv->label && !mv->label)
1548 mv->label = xstrdup (dv->label);
1551 mv = dict_clone_var_assert (m, dv, dv->name);
1557 /* Marks V's master variable as MASTER. */
1559 set_master (struct variable *v, struct variable *master)
1561 var_attach_aux (v, master, NULL);
1564 /* Returns the master variable corresponding to V,
1565 as set with set_master(). */
1566 static struct variable *
1567 get_master (struct variable *v)
1576 A case map copies data from a case that corresponds for one
1577 dictionary to a case that corresponds to a second dictionary
1578 derived from the first by, optionally, deleting, reordering,
1579 or renaming variables. (No new variables may be created.)
1585 size_t value_cnt; /* Number of values in map. */
1586 int *map; /* For each destination index, the
1587 corresponding source index. */
1590 /* Prepares dictionary D for producing a case map. Afterward,
1591 the caller may delete, reorder, or rename variables within D
1592 at will before using finish_case_map() to produce the case
1595 Uses D's aux members, which must otherwise not be in use. */
1597 start_case_map (struct dictionary *d)
1599 size_t var_cnt = dict_get_var_cnt (d);
1602 for (i = 0; i < var_cnt; i++)
1604 struct variable *v = dict_get_var (d, i);
1605 int *src_fv = xmalloc (sizeof *src_fv);
1607 var_attach_aux (v, src_fv, var_dtor_free);
1611 /* Produces a case map from dictionary D, which must have been
1612 previously prepared with start_case_map().
1614 Does not retain any reference to D, and clears the aux members
1615 set up by start_case_map().
1617 Returns the new case map, or a null pointer if no mapping is
1618 required (that is, no data has changed position). */
1619 static struct case_map *
1620 finish_case_map (struct dictionary *d)
1622 struct case_map *map;
1623 size_t var_cnt = dict_get_var_cnt (d);
1627 map = xmalloc (sizeof *map);
1628 map->value_cnt = dict_get_next_value_idx (d);
1629 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1630 for (i = 0; i < map->value_cnt; i++)
1634 for (i = 0; i < var_cnt; i++)
1636 struct variable *v = dict_get_var (d, i);
1637 int *src_fv = (int *) var_detach_aux (v);
1640 if (v->fv != *src_fv)
1643 for (idx = 0; idx < v->nv; idx++)
1645 int src_idx = *src_fv + idx;
1646 int dst_idx = v->fv + idx;
1648 assert (map->map[dst_idx] == -1);
1649 map->map[dst_idx] = src_idx;
1656 destroy_case_map (map);
1660 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1666 /* Maps from SRC to DST, applying case map MAP. */
1668 map_case (const struct case_map *map,
1669 const struct ccase *src, struct ccase *dst)
1673 assert (map != NULL);
1674 assert (src != NULL);
1675 assert (dst != NULL);
1676 assert (src != dst);
1678 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1680 int src_idx = map->map[dst_idx];
1682 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1686 /* Destroys case map MAP. */
1688 destroy_case_map (struct case_map *map)