1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <libpspp/message.h>
23 #include <libpspp/alloc.h>
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case.h>
27 #include <language/command.h>
28 #include <libpspp/compiler.h>
29 #include <data/dictionary.h>
30 #include <libpspp/message.h>
31 #include <language/data-io/file-handle.h>
32 #include <libpspp/hash.h>
33 #include <language/lexer/lexer.h>
34 #include <libpspp/misc.h>
35 #include <data/por-file-writer.h>
36 #include <data/settings.h>
37 #include <data/sys-file-writer.h>
38 #include <libpspp/str.h>
39 #include <data/value-labels.h>
40 #include <data/variable.h>
41 #include <procedure.h>
44 #define _(msgid) gettext (msgid)
46 /* Rearranging and reducing a dictionary. */
47 static void start_case_map (struct dictionary *);
48 static struct case_map *finish_case_map (struct dictionary *);
49 static void map_case (const struct case_map *,
50 const struct ccase *, struct ccase *);
51 static void destroy_case_map (struct case_map *);
53 static bool parse_dict_trim (struct dictionary *);
55 /* Reading system and portable files. */
57 /* Type of command. */
64 /* Case reader input program. */
65 struct case_reader_pgm
67 struct any_reader *reader; /* File reader. */
68 struct case_map *map; /* Map from file dict to active file dict. */
69 struct ccase bounce; /* Bounce buffer. */
72 static const struct case_source_class case_reader_source_class;
74 static void case_reader_pgm_free (struct case_reader_pgm *);
76 /* Parses a GET or IMPORT command. */
78 parse_read_command (enum reader_command type)
80 struct case_reader_pgm *pgm = NULL;
81 struct file_handle *fh = NULL;
82 struct dictionary *dict = NULL;
88 if (lex_match_id ("FILE") || token == T_STRING)
92 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
96 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
100 if (lex_match_id ("COMM"))
102 else if (lex_match_id ("TAPE"))
106 lex_error (_("expecting COMM or TAPE"));
116 lex_sbc_missing ("FILE");
120 discard_variables ();
122 pgm = xmalloc (sizeof *pgm);
123 pgm->reader = any_reader_open (fh, &dict);
125 case_nullify (&pgm->bounce);
126 if (pgm->reader == NULL)
129 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
131 start_case_map (dict);
136 if (!parse_dict_trim (dict))
140 pgm->map = finish_case_map (dict);
142 dict_destroy (default_dict);
145 vfm_source = create_case_source (&case_reader_source_class, pgm);
150 case_reader_pgm_free (pgm);
153 return CMD_CASCADING_FAILURE;
156 /* Frees a struct case_reader_pgm. */
158 case_reader_pgm_free (struct case_reader_pgm *pgm)
162 any_reader_close (pgm->reader);
163 destroy_case_map (pgm->map);
164 case_destroy (&pgm->bounce);
169 /* Clears internal state related to case reader input procedure. */
171 case_reader_source_destroy (struct case_source *source)
173 struct case_reader_pgm *pgm = source->aux;
174 case_reader_pgm_free (pgm);
177 /* Reads all the cases from the data file into C and passes them
178 to WRITE_CASE one by one, passing WC_DATA.
179 Returns true if successful, false if an I/O error occurred. */
181 case_reader_source_read (struct case_source *source,
183 write_case_func *write_case, write_case_data wc_data)
185 struct case_reader_pgm *pgm = source->aux;
191 if (pgm->map == NULL)
192 got_case = any_reader_read (pgm->reader, c);
195 got_case = any_reader_read (pgm->reader, &pgm->bounce);
197 map_case (pgm->map, &pgm->bounce, c);
202 ok = write_case (wc_data);
206 return ok && !any_reader_error (pgm->reader);
209 static const struct case_source_class case_reader_source_class =
213 case_reader_source_read,
214 case_reader_source_destroy,
221 return parse_read_command (GET_CMD);
228 return parse_read_command (IMPORT_CMD);
231 /* Writing system and portable files. */
233 /* Type of output file. */
236 SYSFILE_WRITER, /* System file. */
237 PORFILE_WRITER /* Portable file. */
240 /* Type of a command. */
243 XFORM_CMD, /* Transformation. */
244 PROC_CMD /* Procedure. */
247 /* File writer plus a case map. */
250 struct any_writer *writer; /* File writer. */
251 struct case_map *map; /* Map to output file dictionary
252 (null pointer for identity mapping). */
253 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
258 case_writer_destroy (struct case_writer *aw)
263 ok = any_writer_close (aw->writer);
264 destroy_case_map (aw->map);
265 case_destroy (&aw->bounce);
271 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
272 WRITER_TYPE identifies the type of file to write,
273 and COMMAND_TYPE identifies the type of command.
275 On success, returns a writer.
276 For procedures only, sets *RETAIN_UNSELECTED to true if cases
277 that would otherwise be excluded by FILTER or USE should be
280 On failure, returns a null pointer. */
281 static struct case_writer *
282 parse_write_command (enum writer_type writer_type,
283 enum command_type command_type,
284 bool *retain_unselected)
287 struct file_handle *handle; /* Output file. */
288 struct dictionary *dict; /* Dictionary for output file. */
289 struct case_writer *aw; /* Writer. */
291 /* Common options. */
292 bool print_map; /* Print map? TODO. */
293 bool print_short_names; /* Print long-to-short name map. TODO. */
294 struct sfm_write_options sysfile_opts;
295 struct pfm_write_options porfile_opts;
297 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
298 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
299 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
301 if (command_type == PROC_CMD)
302 *retain_unselected = true;
305 dict = dict_clone (default_dict);
306 aw = xmalloc (sizeof *aw);
309 case_nullify (&aw->bounce);
311 print_short_names = false;
312 sysfile_opts = sfm_writer_default_options ();
313 porfile_opts = pfm_writer_default_options ();
315 start_case_map (dict);
316 dict_delete_scratch_vars (dict);
321 if (lex_match_id ("OUTFILE"))
325 lex_sbc_only_once ("OUTFILE");
331 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
335 else if (lex_match_id ("NAMES"))
336 print_short_names = true;
337 else if (lex_match_id ("PERMISSIONS"))
342 if (lex_match_id ("READONLY"))
344 else if (lex_match_id ("WRITEABLE"))
348 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
351 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
353 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
356 if (lex_match_id ("RETAIN"))
357 *retain_unselected = true;
358 else if (lex_match_id ("DELETE"))
359 *retain_unselected = false;
362 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
366 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
367 sysfile_opts.compress = true;
368 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
369 sysfile_opts.compress = false;
370 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
373 if (!lex_force_int ())
375 sysfile_opts.version = lex_integer ();
378 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
381 if (lex_match_id ("COMMUNICATIONS"))
382 porfile_opts.type = PFM_COMM;
383 else if (lex_match_id ("TAPE"))
384 porfile_opts.type = PFM_TAPE;
387 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
391 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
394 if (!lex_force_int ())
396 porfile_opts.digits = lex_integer ();
399 else if (!parse_dict_trim (dict))
402 if (!lex_match ('/'))
405 if (lex_end_of_command () != CMD_SUCCESS)
410 lex_sbc_missing ("OUTFILE");
414 dict_compact_values (dict);
415 aw->map = finish_case_map (dict);
417 case_create (&aw->bounce, dict_get_next_value_idx (dict));
419 if (fh_get_referent (handle) == FH_REF_FILE)
424 aw->writer = any_writer_from_sfm_writer (
425 sfm_open_writer (handle, dict, sysfile_opts));
428 aw->writer = any_writer_from_pfm_writer (
429 pfm_open_writer (handle, dict, porfile_opts));
434 aw->writer = any_writer_open (handle, dict);
440 case_writer_destroy (aw);
445 /* Writes case C to writer AW. */
447 case_writer_write_case (struct case_writer *aw, struct ccase *c)
451 map_case (aw->map, c, &aw->bounce);
454 return any_writer_write (aw->writer, c);
457 /* SAVE and EXPORT. */
459 static bool output_proc (struct ccase *, void *);
461 /* Parses and performs the SAVE or EXPORT procedure. */
463 parse_output_proc (enum writer_type writer_type)
465 bool retain_unselected;
466 struct variable *saved_filter_variable;
467 struct case_writer *aw;
470 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
472 return CMD_CASCADING_FAILURE;
474 saved_filter_variable = dict_get_filter (default_dict);
475 if (retain_unselected)
476 dict_set_filter (default_dict, NULL);
477 ok = procedure (output_proc, aw);
478 dict_set_filter (default_dict, saved_filter_variable);
480 case_writer_destroy (aw);
481 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
484 /* Writes case C to file. */
486 output_proc (struct ccase *c, void *aw_)
488 struct case_writer *aw = aw_;
489 return case_writer_write_case (aw, c);
495 return parse_output_proc (SYSFILE_WRITER);
501 return parse_output_proc (PORFILE_WRITER);
504 /* XSAVE and XEXPORT. */
506 /* Transformation. */
509 struct case_writer *aw; /* Writer. */
512 static trns_proc_func output_trns_proc;
513 static trns_free_func output_trns_free;
515 /* Parses the XSAVE or XEXPORT transformation command. */
517 parse_output_trns (enum writer_type writer_type)
519 struct output_trns *t = xmalloc (sizeof *t);
520 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
524 return CMD_CASCADING_FAILURE;
527 add_transformation (output_trns_proc, output_trns_free, t);
531 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
533 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
535 struct output_trns *t = trns_;
536 case_writer_write_case (t->aw, c);
537 return TRNS_CONTINUE;
540 /* Frees an XSAVE or XEXPORT transformation.
541 Returns true if successful, false if an I/O error occurred. */
543 output_trns_free (void *trns_)
545 struct output_trns *t = trns_;
550 ok = case_writer_destroy (t->aw);
560 return parse_output_trns (SYSFILE_WRITER);
563 /* XEXPORT command. */
567 return parse_output_trns (PORFILE_WRITER);
570 static bool rename_variables (struct dictionary *dict);
571 static bool drop_variables (struct dictionary *dict);
572 static bool keep_variables (struct dictionary *dict);
574 /* Commands that read and write system files share a great deal
575 of common syntactic structure for rearranging and dropping
576 variables. This function parses this syntax and modifies DICT
577 appropriately. Returns true on success, false on failure. */
579 parse_dict_trim (struct dictionary *dict)
581 if (lex_match_id ("MAP"))
586 else if (lex_match_id ("DROP"))
587 return drop_variables (dict);
588 else if (lex_match_id ("KEEP"))
589 return keep_variables (dict);
590 else if (lex_match_id ("RENAME"))
591 return rename_variables (dict);
594 lex_error (_("expecting a valid subcommand"));
599 /* Parses and performs the RENAME subcommand of GET and SAVE. */
601 rename_variables (struct dictionary *dict)
619 v = parse_dict_variable (dict);
622 if (!lex_force_match ('=')
625 if (dict_lookup_var (dict, tokid) != NULL)
627 msg (SE, _("Cannot rename %s as %s because there already exists "
628 "a variable named %s. To rename variables with "
629 "overlapping names, use a single RENAME subcommand "
630 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
631 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
635 dict_rename_var (dict, v, tokid);
644 while (lex_match ('('))
648 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
650 if (!lex_match ('='))
652 msg (SE, _("`=' expected after variable list."));
655 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
659 msg (SE, _("Number of variables on left side of `=' (%d) does not "
660 "match number of variables on right side (%d), in "
661 "parenthesized group %d of RENAME subcommand."),
662 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
665 if (!lex_force_match (')'))
670 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
672 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
678 for (i = 0; i < nn; i++)
686 /* Parses and performs the DROP subcommand of GET and SAVE.
687 Returns true if successful, false on failure.*/
689 drop_variables (struct dictionary *dict)
695 if (!parse_variables (dict, &v, &nv, PV_NONE))
697 dict_delete_vars (dict, v, nv);
700 if (dict_get_var_cnt (dict) == 0)
702 msg (SE, _("Cannot DROP all variables from dictionary."));
708 /* Parses and performs the KEEP subcommand of GET and SAVE.
709 Returns true if successful, false on failure.*/
711 keep_variables (struct dictionary *dict)
718 if (!parse_variables (dict, &v, &nv, PV_NONE))
721 /* Move the specified variables to the beginning. */
722 dict_reorder_vars (dict, v, nv);
724 /* Delete the remaining variables. */
725 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
726 for (i = nv; i < dict_get_var_cnt (dict); i++)
727 v[i - nv] = dict_get_var (dict, i);
728 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
739 MTF_FILE, /* Specified on FILE= subcommand. */
740 MTF_TABLE /* Specified on TABLE= subcommand. */
743 /* One of the files on MATCH FILES. */
746 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
747 struct mtf_file *next_min; /* Next in the chain of minimums. */
749 int type; /* One of MTF_*. */
750 struct variable **by; /* List of BY variables for this file. */
751 struct file_handle *handle; /* File handle. */
752 struct any_reader *reader; /* File reader. */
753 struct dictionary *dict; /* Dictionary from system file. */
756 char *in_name; /* Variable name. */
757 struct variable *in_var; /* Variable (in master dictionary). */
759 struct ccase input; /* Input record. */
762 /* MATCH FILES procedure. */
765 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
766 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
768 bool ok; /* False if I/O error occurs. */
770 size_t by_cnt; /* Number of variables on BY subcommand. */
772 /* Names of FIRST, LAST variables. */
773 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
775 struct dictionary *dict; /* Dictionary of output file. */
776 struct case_sink *sink; /* Sink to receive output. */
777 struct ccase mtf_case; /* Case used for output. */
779 unsigned seq_num; /* Have we initialized this variable? */
780 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
783 static bool mtf_free (struct mtf_proc *);
784 static bool mtf_close_file (struct mtf_file *);
785 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
786 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
788 static bool mtf_read_nonactive_records (void *);
789 static bool mtf_processing_finish (void *);
790 static bool mtf_processing (struct ccase *, void *);
792 static char *var_type_description (struct variable *);
794 static void set_master (struct variable *, struct variable *master);
795 static struct variable *get_master (struct variable *);
797 /* Parse and execute the MATCH FILES command. */
799 cmd_match_files (void)
802 struct mtf_file *first_table = NULL;
803 struct mtf_file *iter;
805 bool used_active_file = false;
806 bool saw_table = false;
811 mtf.head = mtf.tail = NULL;
815 mtf.dict = dict_create ();
817 case_nullify (&mtf.mtf_case);
820 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
824 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
826 struct mtf_file *file = xmalloc (sizeof *file);
828 if (lex_match_id ("FILE"))
829 file->type = MTF_FILE;
830 else if (lex_match_id ("TABLE"))
832 file->type = MTF_TABLE;
843 file->in_name = NULL;
845 case_nullify (&file->input);
847 /* FILEs go first, then TABLEs. */
848 if (file->type == MTF_TABLE || first_table == NULL)
851 file->prev = mtf.tail;
853 mtf.tail->next = file;
855 if (mtf.head == NULL)
857 if (file->type == MTF_TABLE && first_table == NULL)
862 assert (file->type == MTF_FILE);
863 file->next = first_table;
864 file->prev = first_table->prev;
865 if (first_table->prev)
866 first_table->prev->next = file;
869 first_table->prev = file;
877 if (used_active_file)
879 msg (SE, _("The active file may not be specified more "
883 used_active_file = true;
885 assert (pgm_state != STATE_INPUT);
886 if (pgm_state == STATE_INIT)
888 msg (SE, _("Cannot specify the active file since no active "
889 "file has been defined."));
896 _("MATCH FILES may not be used after TEMPORARY when "
897 "the active file is an input source. "
898 "Temporary transformations will be made permanent."));
902 file->dict = default_dict;
906 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
907 if (file->handle == NULL)
910 file->reader = any_reader_open (file->handle, &file->dict);
911 if (file->reader == NULL)
914 case_create (&file->input, dict_get_next_value_idx (file->dict));
917 while (lex_match ('/'))
918 if (lex_match_id ("RENAME"))
920 if (!rename_variables (file->dict))
923 else if (lex_match_id ("IN"))
932 if (file->in_name != NULL)
934 msg (SE, _("Multiple IN subcommands for a single FILE or "
938 file->in_name = xstrdup (tokid);
943 mtf_merge_dictionary (mtf.dict, file);
948 if (lex_match (T_BY))
950 struct variable **by;
954 msg (SE, _("BY may appear at most once."));
959 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
960 PV_NO_DUPLICATE | PV_NO_SCRATCH))
963 for (iter = mtf.head; iter != NULL; iter = iter->next)
967 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
969 for (i = 0; i < mtf.by_cnt; i++)
971 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
972 if (iter->by[i] == NULL)
974 msg (SE, _("File %s lacks BY variable %s."),
975 iter->handle ? fh_get_name (iter->handle) : "*",
984 else if (lex_match_id ("FIRST"))
986 if (mtf.first[0] != '\0')
988 msg (SE, _("FIRST may appear at most once."));
993 if (!lex_force_id ())
995 strcpy (mtf.first, tokid);
998 else if (lex_match_id ("LAST"))
1000 if (mtf.last[0] != '\0')
1002 msg (SE, _("LAST may appear at most once."));
1007 if (!lex_force_id ())
1009 strcpy (mtf.last, tokid);
1012 else if (lex_match_id ("MAP"))
1016 else if (lex_match_id ("DROP"))
1018 if (!drop_variables (mtf.dict))
1021 else if (lex_match_id ("KEEP"))
1023 if (!keep_variables (mtf.dict))
1032 if (!lex_match ('/') && token != '.')
1034 lex_end_of_command ();
1039 if (mtf.by_cnt == 0)
1043 msg (SE, _("BY is required when TABLE is specified."));
1048 msg (SE, _("BY is required when IN is specified."));
1053 /* Set up mapping from each file's variables to master
1055 for (iter = mtf.head; iter != NULL; iter = iter->next)
1057 struct dictionary *d = iter->dict;
1060 for (i = 0; i < dict_get_var_cnt (d); i++)
1062 struct variable *v = dict_get_var (d, i);
1063 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1069 /* Add IN variables to master dictionary. */
1070 for (iter = mtf.head; iter != NULL; iter = iter->next)
1071 if (iter->in_name != NULL)
1073 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1074 if (iter->in_var == NULL)
1076 msg (SE, _("IN variable name %s duplicates an "
1077 "existing variable name."),
1078 iter->in_var->name);
1081 iter->in_var->print = iter->in_var->write
1082 = make_output_format (FMT_F, 1, 0);
1085 /* MATCH FILES performs an n-way merge on all its input files.
1088 1. Read one input record from every input FILE.
1090 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1092 3. Find the FILE input record(s) that have minimum BY
1093 values. Store all the values from these input records into
1096 4. For every TABLE, read another record as long as the BY values
1097 on the TABLE's input record are less than the FILEs' BY values.
1098 If an exact match is found, store all the values from the TABLE
1099 input record into the output record.
1101 5. Write the output record.
1103 6. Read another record from each input file FILE and TABLE that
1104 we stored values from above. If we come to the end of one of the
1105 input files, remove it from the list of input files.
1107 7. Repeat from step 2.
1109 Unfortunately, this algorithm can't be implemented in a
1110 straightforward way because there's no function to read a
1111 record from the active file. Instead, it has to be written
1114 FIXME: For merging large numbers of files (more than 10?) a
1115 better algorithm would use a heap for finding minimum
1118 if (!used_active_file)
1119 discard_variables ();
1121 dict_compact_values (mtf.dict);
1122 mtf.sink = create_case_sink (&storage_sink_class, mtf.dict, NULL);
1123 if (mtf.sink->class->open != NULL)
1124 mtf.sink->class->open (mtf.sink);
1126 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1127 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1129 if (!mtf_read_nonactive_records (&mtf))
1132 if (used_active_file)
1133 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1135 ok = mtf_processing_finish (&mtf);
1137 free_case_source (vfm_source);
1140 dict_destroy (default_dict);
1141 default_dict = mtf.dict;
1143 vfm_source = mtf.sink->class->make_source (mtf.sink);
1144 free_case_sink (mtf.sink);
1146 if (!mtf_free (&mtf))
1148 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1152 return CMD_CASCADING_FAILURE;
1155 /* Repeats 2...7 an arbitrary number of times. */
1157 mtf_processing_finish (void *mtf_)
1159 struct mtf_proc *mtf = mtf_;
1160 struct mtf_file *iter;
1162 /* Find the active file and delete it. */
1163 for (iter = mtf->head; iter; iter = iter->next)
1164 if (iter->handle == NULL)
1166 if (!mtf_delete_file_in_place (mtf, &iter))
1171 while (mtf->head && mtf->head->type == MTF_FILE)
1172 if (!mtf_processing (NULL, mtf))
1178 /* Return a string in a static buffer describing V's variable type and
1181 var_type_description (struct variable *v)
1183 static char buf[2][32];
1190 if (v->type == NUMERIC)
1191 strcpy (s, "numeric");
1194 assert (v->type == ALPHA);
1195 sprintf (s, "string with width %d", v->width);
1200 /* Closes FILE and frees its associated data.
1201 Returns true if successful, false if an I/O error
1202 occurred on FILE. */
1204 mtf_close_file (struct mtf_file *file)
1206 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1208 any_reader_close (file->reader);
1209 if (file->handle != NULL)
1210 dict_destroy (file->dict);
1211 case_destroy (&file->input);
1212 free (file->in_name);
1217 /* Free all the data for the MATCH FILES procedure.
1218 Returns true if successful, false if an I/O error
1221 mtf_free (struct mtf_proc *mtf)
1223 struct mtf_file *iter, *next;
1226 for (iter = mtf->head; iter; iter = next)
1229 assert (iter->dict != mtf->dict);
1230 if (!mtf_close_file (iter))
1235 dict_destroy (mtf->dict);
1236 case_destroy (&mtf->mtf_case);
1237 free (mtf->seq_nums);
1242 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1243 file in the chain, or to NULL if was the last in the chain.
1244 Returns true if successful, false if an I/O error occurred. */
1246 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1248 struct mtf_file *f = *file;
1252 f->prev->next = f->next;
1254 f->next->prev = f->prev;
1256 mtf->head = f->next;
1258 mtf->tail = f->prev;
1261 if (f->in_var != NULL)
1262 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1263 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1265 struct variable *v = dict_get_var (f->dict, i);
1266 struct variable *mv = get_master (v);
1269 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1271 if (v->type == NUMERIC)
1274 memset (out->s, ' ', v->width);
1278 return mtf_close_file (f);
1281 /* Read a record from every input file except the active file.
1282 Returns true if successful, false if an I/O error occurred. */
1284 mtf_read_nonactive_records (void *mtf_)
1286 struct mtf_proc *mtf = mtf_;
1287 struct mtf_file *iter, *next;
1290 for (iter = mtf->head; ok && iter != NULL; iter = next)
1293 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1294 if (!mtf_delete_file_in_place (mtf, &iter))
1300 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1301 if A == B, 1 if A > B. */
1303 mtf_compare_BY_values (struct mtf_proc *mtf,
1304 struct mtf_file *a, struct mtf_file *b,
1307 struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1308 struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1309 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1310 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1313 /* Perform one iteration of steps 3...7 above.
1314 Returns true if successful, false if an I/O error occurred. */
1316 mtf_processing (struct ccase *c, void *mtf_)
1318 struct mtf_proc *mtf = mtf_;
1320 /* Do we need another record from the active file? */
1321 bool read_active_file;
1323 assert (mtf->head != NULL);
1324 if (mtf->head->type == MTF_TABLE)
1329 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1330 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1331 struct mtf_file *iter, *next;
1333 read_active_file = false;
1335 /* 3. Find the FILE input record(s) that have minimum BY
1336 values. Store all the values from these input records into
1337 the output record. */
1338 min_head = min_tail = mtf->head;
1339 max_head = max_tail = NULL;
1340 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1343 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1347 max_tail = max_tail->next_min = iter;
1349 max_head = max_tail = iter;
1352 min_tail = min_tail->next_min = iter;
1357 max_tail->next_min = min_head;
1358 max_tail = min_tail;
1362 max_head = min_head;
1363 max_tail = min_tail;
1365 min_head = min_tail = iter;
1369 /* 4. For every TABLE, read another record as long as the BY
1370 values on the TABLE's input record are less than the FILEs'
1371 BY values. If an exact match is found, store all the values
1372 from the TABLE input record into the output record. */
1373 for (; iter != NULL; iter = next)
1375 assert (iter->type == MTF_TABLE);
1380 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1384 max_tail = max_tail->next_min = iter;
1386 max_head = max_tail = iter;
1389 min_tail = min_tail->next_min = iter;
1392 if (iter->handle == NULL)
1394 if (any_reader_read (iter->reader, &iter->input))
1396 if (!mtf_delete_file_in_place (mtf, &iter))
1403 /* Next sequence number. */
1406 /* Store data to all the records we are using. */
1408 min_tail->next_min = NULL;
1409 for (iter = min_head; iter; iter = iter->next_min)
1413 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1415 struct variable *v = dict_get_var (iter->dict, i);
1416 struct variable *mv = get_master (v);
1418 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1420 struct ccase *record
1421 = case_is_null (&iter->input) ? c : &iter->input;
1422 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1424 mtf->seq_nums[mv->index] = mtf->seq_num;
1425 if (v->type == NUMERIC)
1426 out->f = case_num (record, v->fv);
1428 memcpy (out->s, case_str (record, v->fv), v->width);
1431 if (iter->in_var != NULL)
1432 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1434 if (iter->type == MTF_FILE && iter->handle == NULL)
1435 read_active_file = true;
1438 /* Store missing values to all the records we're not
1441 max_tail->next_min = NULL;
1442 for (iter = max_head; iter; iter = iter->next_min)
1446 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1448 struct variable *v = dict_get_var (iter->dict, i);
1449 struct variable *mv = get_master (v);
1451 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1453 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1454 mtf->seq_nums[mv->index] = mtf->seq_num;
1456 if (v->type == NUMERIC)
1459 memset (out->s, ' ', v->width);
1462 if (iter->in_var != NULL)
1463 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1466 /* 5. Write the output record. */
1467 mtf->sink->class->write (mtf->sink, &mtf->mtf_case);
1469 /* 6. Read another record from each input file FILE and TABLE
1470 that we stored values from above. If we come to the end of
1471 one of the input files, remove it from the list of input
1473 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1475 next = iter->next_min;
1476 if (iter->reader != NULL
1477 && !any_reader_read (iter->reader, &iter->input))
1478 if (!mtf_delete_file_in_place (mtf, &iter))
1482 while (!read_active_file
1483 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1488 /* Merge the dictionary for file F into master dictionary M. */
1490 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1492 struct dictionary *d = f->dict;
1493 const char *d_docs, *m_docs;
1496 if (dict_get_label (m) == NULL)
1497 dict_set_label (m, dict_get_label (d));
1499 d_docs = dict_get_documents (d);
1500 m_docs = dict_get_documents (m);
1504 dict_set_documents (m, d_docs);
1510 new_len = strlen (m_docs) + strlen (d_docs);
1511 new_docs = xmalloc (new_len + 1);
1512 strcpy (new_docs, m_docs);
1513 strcat (new_docs, d_docs);
1514 dict_set_documents (m, new_docs);
1519 for (i = 0; i < dict_get_var_cnt (d); i++)
1521 struct variable *dv = dict_get_var (d, i);
1522 struct variable *mv = dict_lookup_var (m, dv->name);
1524 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1529 if (mv->width != dv->width)
1531 msg (SE, _("Variable %s in file %s (%s) has different "
1532 "type or width from the same variable in "
1533 "earlier file (%s)."),
1534 dv->name, fh_get_name (f->handle),
1535 var_type_description (dv), var_type_description (mv));
1539 if (dv->width == mv->width)
1541 if (val_labs_count (dv->val_labs)
1542 && !val_labs_count (mv->val_labs))
1543 mv->val_labs = val_labs_copy (dv->val_labs);
1544 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1545 mv_copy (&mv->miss, &dv->miss);
1548 if (dv->label && !mv->label)
1549 mv->label = xstrdup (dv->label);
1552 mv = dict_clone_var_assert (m, dv, dv->name);
1558 /* Marks V's master variable as MASTER. */
1560 set_master (struct variable *v, struct variable *master)
1562 var_attach_aux (v, master, NULL);
1565 /* Returns the master variable corresponding to V,
1566 as set with set_master(). */
1567 static struct variable *
1568 get_master (struct variable *v)
1577 A case map copies data from a case that corresponds for one
1578 dictionary to a case that corresponds to a second dictionary
1579 derived from the first by, optionally, deleting, reordering,
1580 or renaming variables. (No new variables may be created.)
1586 size_t value_cnt; /* Number of values in map. */
1587 int *map; /* For each destination index, the
1588 corresponding source index. */
1591 /* Prepares dictionary D for producing a case map. Afterward,
1592 the caller may delete, reorder, or rename variables within D
1593 at will before using finish_case_map() to produce the case
1596 Uses D's aux members, which must otherwise not be in use. */
1598 start_case_map (struct dictionary *d)
1600 size_t var_cnt = dict_get_var_cnt (d);
1603 for (i = 0; i < var_cnt; i++)
1605 struct variable *v = dict_get_var (d, i);
1606 int *src_fv = xmalloc (sizeof *src_fv);
1608 var_attach_aux (v, src_fv, var_dtor_free);
1612 /* Produces a case map from dictionary D, which must have been
1613 previously prepared with start_case_map().
1615 Does not retain any reference to D, and clears the aux members
1616 set up by start_case_map().
1618 Returns the new case map, or a null pointer if no mapping is
1619 required (that is, no data has changed position). */
1620 static struct case_map *
1621 finish_case_map (struct dictionary *d)
1623 struct case_map *map;
1624 size_t var_cnt = dict_get_var_cnt (d);
1628 map = xmalloc (sizeof *map);
1629 map->value_cnt = dict_get_next_value_idx (d);
1630 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1631 for (i = 0; i < map->value_cnt; i++)
1635 for (i = 0; i < var_cnt; i++)
1637 struct variable *v = dict_get_var (d, i);
1638 int *src_fv = (int *) var_detach_aux (v);
1641 if (v->fv != *src_fv)
1644 for (idx = 0; idx < v->nv; idx++)
1646 int src_idx = *src_fv + idx;
1647 int dst_idx = v->fv + idx;
1649 assert (map->map[dst_idx] == -1);
1650 map->map[dst_idx] = src_idx;
1657 destroy_case_map (map);
1661 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1667 /* Maps from SRC to DST, applying case map MAP. */
1669 map_case (const struct case_map *map,
1670 const struct ccase *src, struct ccase *dst)
1674 assert (map != NULL);
1675 assert (src != NULL);
1676 assert (dst != NULL);
1677 assert (src != dst);
1679 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1681 int src_idx = map->map[dst_idx];
1683 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1687 /* Destroys case map MAP. */
1689 destroy_case_map (struct case_map *map)