1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/por-file-writer.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/storage-stream.h>
35 #include <data/sys-file-writer.h>
36 #include <data/transformations.h>
37 #include <data/value-labels.h>
38 #include <data/variable.h>
39 #include <language/command.h>
40 #include <language/data-io/file-handle.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/alloc.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/hash.h>
46 #include <libpspp/message.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
52 #define _(msgid) gettext (msgid)
54 /* Rearranging and reducing a dictionary. */
55 static void start_case_map (struct dictionary *);
56 static struct case_map *finish_case_map (struct dictionary *);
57 static void map_case (const struct case_map *,
58 const struct ccase *, struct ccase *);
59 static void destroy_case_map (struct case_map *);
61 static bool parse_dict_trim (struct dictionary *);
63 /* Reading system and portable files. */
65 /* Type of command. */
72 /* Case reader input program. */
73 struct case_reader_pgm
75 struct any_reader *reader; /* File reader. */
76 struct case_map *map; /* Map from file dict to active file dict. */
77 struct ccase bounce; /* Bounce buffer. */
80 static const struct case_source_class case_reader_source_class;
82 static void case_reader_pgm_free (struct case_reader_pgm *);
84 /* Parses a GET or IMPORT command. */
86 parse_read_command (enum reader_command type)
88 struct case_reader_pgm *pgm = NULL;
89 struct file_handle *fh = NULL;
90 struct dictionary *dict = NULL;
96 if (lex_match_id ("FILE") || token == T_STRING)
100 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
104 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
108 if (lex_match_id ("COMM"))
110 else if (lex_match_id ("TAPE"))
114 lex_error (_("expecting COMM or TAPE"));
124 lex_sbc_missing ("FILE");
128 discard_variables ();
130 pgm = xmalloc (sizeof *pgm);
131 pgm->reader = any_reader_open (fh, &dict);
133 case_nullify (&pgm->bounce);
134 if (pgm->reader == NULL)
137 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
139 start_case_map (dict);
144 if (!parse_dict_trim (dict))
148 pgm->map = finish_case_map (dict);
150 dict_destroy (default_dict);
153 proc_set_source (create_case_source (&case_reader_source_class, pgm));
158 case_reader_pgm_free (pgm);
161 return CMD_CASCADING_FAILURE;
164 /* Frees a struct case_reader_pgm. */
166 case_reader_pgm_free (struct case_reader_pgm *pgm)
170 any_reader_close (pgm->reader);
171 destroy_case_map (pgm->map);
172 case_destroy (&pgm->bounce);
177 /* Clears internal state related to case reader input procedure. */
179 case_reader_source_destroy (struct case_source *source)
181 struct case_reader_pgm *pgm = source->aux;
182 case_reader_pgm_free (pgm);
185 /* Reads all the cases from the data file into C and passes them
186 to WRITE_CASE one by one, passing WC_DATA.
187 Returns true if successful, false if an I/O error occurred. */
189 case_reader_source_read (struct case_source *source,
191 write_case_func *write_case, write_case_data wc_data)
193 struct case_reader_pgm *pgm = source->aux;
199 if (pgm->map == NULL)
200 got_case = any_reader_read (pgm->reader, c);
203 got_case = any_reader_read (pgm->reader, &pgm->bounce);
205 map_case (pgm->map, &pgm->bounce, c);
210 ok = write_case (wc_data);
214 return ok && !any_reader_error (pgm->reader);
217 static const struct case_source_class case_reader_source_class =
221 case_reader_source_read,
222 case_reader_source_destroy,
229 return parse_read_command (GET_CMD);
236 return parse_read_command (IMPORT_CMD);
239 /* Writing system and portable files. */
241 /* Type of output file. */
244 SYSFILE_WRITER, /* System file. */
245 PORFILE_WRITER /* Portable file. */
248 /* Type of a command. */
251 XFORM_CMD, /* Transformation. */
252 PROC_CMD /* Procedure. */
255 /* File writer plus a case map. */
258 struct any_writer *writer; /* File writer. */
259 struct case_map *map; /* Map to output file dictionary
260 (null pointer for identity mapping). */
261 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
266 case_writer_destroy (struct case_writer *aw)
271 ok = any_writer_close (aw->writer);
272 destroy_case_map (aw->map);
273 case_destroy (&aw->bounce);
279 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
280 WRITER_TYPE identifies the type of file to write,
281 and COMMAND_TYPE identifies the type of command.
283 On success, returns a writer.
284 For procedures only, sets *RETAIN_UNSELECTED to true if cases
285 that would otherwise be excluded by FILTER or USE should be
288 On failure, returns a null pointer. */
289 static struct case_writer *
290 parse_write_command (enum writer_type writer_type,
291 enum command_type command_type,
292 bool *retain_unselected)
295 struct file_handle *handle; /* Output file. */
296 struct dictionary *dict; /* Dictionary for output file. */
297 struct case_writer *aw; /* Writer. */
299 /* Common options. */
300 bool print_map; /* Print map? TODO. */
301 bool print_short_names; /* Print long-to-short name map. TODO. */
302 struct sfm_write_options sysfile_opts;
303 struct pfm_write_options porfile_opts;
305 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
306 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
307 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
309 if (command_type == PROC_CMD)
310 *retain_unselected = true;
313 dict = dict_clone (default_dict);
314 aw = xmalloc (sizeof *aw);
317 case_nullify (&aw->bounce);
319 print_short_names = false;
320 sysfile_opts = sfm_writer_default_options ();
321 porfile_opts = pfm_writer_default_options ();
323 start_case_map (dict);
324 dict_delete_scratch_vars (dict);
329 if (lex_match_id ("OUTFILE"))
333 lex_sbc_only_once ("OUTFILE");
339 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
343 else if (lex_match_id ("NAMES"))
344 print_short_names = true;
345 else if (lex_match_id ("PERMISSIONS"))
350 if (lex_match_id ("READONLY"))
352 else if (lex_match_id ("WRITEABLE"))
356 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
359 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
361 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
364 if (lex_match_id ("RETAIN"))
365 *retain_unselected = true;
366 else if (lex_match_id ("DELETE"))
367 *retain_unselected = false;
370 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
374 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
375 sysfile_opts.compress = true;
376 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
377 sysfile_opts.compress = false;
378 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
381 if (!lex_force_int ())
383 sysfile_opts.version = lex_integer ();
386 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
389 if (lex_match_id ("COMMUNICATIONS"))
390 porfile_opts.type = PFM_COMM;
391 else if (lex_match_id ("TAPE"))
392 porfile_opts.type = PFM_TAPE;
395 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
399 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
402 if (!lex_force_int ())
404 porfile_opts.digits = lex_integer ();
407 else if (!parse_dict_trim (dict))
410 if (!lex_match ('/'))
413 if (lex_end_of_command () != CMD_SUCCESS)
418 lex_sbc_missing ("OUTFILE");
422 dict_compact_values (dict);
423 aw->map = finish_case_map (dict);
425 case_create (&aw->bounce, dict_get_next_value_idx (dict));
427 if (fh_get_referent (handle) == FH_REF_FILE)
432 aw->writer = any_writer_from_sfm_writer (
433 sfm_open_writer (handle, dict, sysfile_opts));
436 aw->writer = any_writer_from_pfm_writer (
437 pfm_open_writer (handle, dict, porfile_opts));
442 aw->writer = any_writer_open (handle, dict);
448 case_writer_destroy (aw);
453 /* Writes case C to writer AW. */
455 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
459 map_case (aw->map, c, &aw->bounce);
462 return any_writer_write (aw->writer, c);
465 /* SAVE and EXPORT. */
467 static bool output_proc (const struct ccase *, void *);
469 /* Parses and performs the SAVE or EXPORT procedure. */
471 parse_output_proc (enum writer_type writer_type)
473 bool retain_unselected;
474 struct variable *saved_filter_variable;
475 struct case_writer *aw;
478 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
480 return CMD_CASCADING_FAILURE;
482 saved_filter_variable = dict_get_filter (default_dict);
483 if (retain_unselected)
484 dict_set_filter (default_dict, NULL);
485 ok = procedure (output_proc, aw);
486 dict_set_filter (default_dict, saved_filter_variable);
488 case_writer_destroy (aw);
489 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
492 /* Writes case C to file. */
494 output_proc (const struct ccase *c, void *aw_)
496 struct case_writer *aw = aw_;
497 return case_writer_write_case (aw, c);
503 return parse_output_proc (SYSFILE_WRITER);
509 return parse_output_proc (PORFILE_WRITER);
512 /* XSAVE and XEXPORT. */
514 /* Transformation. */
517 struct case_writer *aw; /* Writer. */
520 static trns_proc_func output_trns_proc;
521 static trns_free_func output_trns_free;
523 /* Parses the XSAVE or XEXPORT transformation command. */
525 parse_output_trns (enum writer_type writer_type)
527 struct output_trns *t = xmalloc (sizeof *t);
528 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
532 return CMD_CASCADING_FAILURE;
535 add_transformation (output_trns_proc, output_trns_free, t);
539 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
541 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
543 struct output_trns *t = trns_;
544 case_writer_write_case (t->aw, c);
545 return TRNS_CONTINUE;
548 /* Frees an XSAVE or XEXPORT transformation.
549 Returns true if successful, false if an I/O error occurred. */
551 output_trns_free (void *trns_)
553 struct output_trns *t = trns_;
558 ok = case_writer_destroy (t->aw);
568 return parse_output_trns (SYSFILE_WRITER);
571 /* XEXPORT command. */
575 return parse_output_trns (PORFILE_WRITER);
578 static bool rename_variables (struct dictionary *dict);
579 static bool drop_variables (struct dictionary *dict);
580 static bool keep_variables (struct dictionary *dict);
582 /* Commands that read and write system files share a great deal
583 of common syntactic structure for rearranging and dropping
584 variables. This function parses this syntax and modifies DICT
585 appropriately. Returns true on success, false on failure. */
587 parse_dict_trim (struct dictionary *dict)
589 if (lex_match_id ("MAP"))
594 else if (lex_match_id ("DROP"))
595 return drop_variables (dict);
596 else if (lex_match_id ("KEEP"))
597 return keep_variables (dict);
598 else if (lex_match_id ("RENAME"))
599 return rename_variables (dict);
602 lex_error (_("expecting a valid subcommand"));
607 /* Parses and performs the RENAME subcommand of GET and SAVE. */
609 rename_variables (struct dictionary *dict)
627 v = parse_dict_variable (dict);
630 if (!lex_force_match ('=')
633 if (dict_lookup_var (dict, tokid) != NULL)
635 msg (SE, _("Cannot rename %s as %s because there already exists "
636 "a variable named %s. To rename variables with "
637 "overlapping names, use a single RENAME subcommand "
638 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
639 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
643 dict_rename_var (dict, v, tokid);
652 while (lex_match ('('))
656 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
658 if (!lex_match ('='))
660 msg (SE, _("`=' expected after variable list."));
663 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
667 msg (SE, _("Number of variables on left side of `=' (%d) does not "
668 "match number of variables on right side (%d), in "
669 "parenthesized group %d of RENAME subcommand."),
670 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
673 if (!lex_force_match (')'))
678 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
680 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
686 for (i = 0; i < nn; i++)
694 /* Parses and performs the DROP subcommand of GET and SAVE.
695 Returns true if successful, false on failure.*/
697 drop_variables (struct dictionary *dict)
703 if (!parse_variables (dict, &v, &nv, PV_NONE))
705 dict_delete_vars (dict, v, nv);
708 if (dict_get_var_cnt (dict) == 0)
710 msg (SE, _("Cannot DROP all variables from dictionary."));
716 /* Parses and performs the KEEP subcommand of GET and SAVE.
717 Returns true if successful, false on failure.*/
719 keep_variables (struct dictionary *dict)
726 if (!parse_variables (dict, &v, &nv, PV_NONE))
729 /* Move the specified variables to the beginning. */
730 dict_reorder_vars (dict, v, nv);
732 /* Delete the remaining variables. */
733 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
734 for (i = nv; i < dict_get_var_cnt (dict); i++)
735 v[i - nv] = dict_get_var (dict, i);
736 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
747 MTF_FILE, /* Specified on FILE= subcommand. */
748 MTF_TABLE /* Specified on TABLE= subcommand. */
751 /* One of the files on MATCH FILES. */
754 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
755 struct mtf_file *next_min; /* Next in the chain of minimums. */
757 int type; /* One of MTF_*. */
758 struct variable **by; /* List of BY variables for this file. */
759 struct file_handle *handle; /* File handle. */
760 struct any_reader *reader; /* File reader. */
761 struct dictionary *dict; /* Dictionary from system file. */
764 char *in_name; /* Variable name. */
765 struct variable *in_var; /* Variable (in master dictionary). */
767 struct ccase input; /* Input record. */
770 /* MATCH FILES procedure. */
773 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
774 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
776 bool ok; /* False if I/O error occurs. */
778 size_t by_cnt; /* Number of variables on BY subcommand. */
780 /* Names of FIRST, LAST variables. */
781 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
783 struct dictionary *dict; /* Dictionary of output file. */
784 struct casefile *output; /* MATCH FILES output. */
785 struct ccase mtf_case; /* Case used for output. */
787 unsigned seq_num; /* Have we initialized this variable? */
788 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
791 static bool mtf_free (struct mtf_proc *);
792 static bool mtf_close_file (struct mtf_file *);
793 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
794 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
796 static bool mtf_read_nonactive_records (void *);
797 static bool mtf_processing_finish (void *);
798 static bool mtf_processing (const struct ccase *, void *);
800 static char *var_type_description (struct variable *);
802 static void set_master (struct variable *, struct variable *master);
803 static struct variable *get_master (struct variable *);
805 /* Parse and execute the MATCH FILES command. */
807 cmd_match_files (void)
810 struct mtf_file *first_table = NULL;
811 struct mtf_file *iter;
813 bool used_active_file = false;
814 bool saw_table = false;
819 mtf.head = mtf.tail = NULL;
823 mtf.dict = dict_create ();
825 case_nullify (&mtf.mtf_case);
828 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
832 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
834 struct mtf_file *file = xmalloc (sizeof *file);
836 if (lex_match_id ("FILE"))
837 file->type = MTF_FILE;
838 else if (lex_match_id ("TABLE"))
840 file->type = MTF_TABLE;
851 file->in_name = NULL;
853 case_nullify (&file->input);
855 /* FILEs go first, then TABLEs. */
856 if (file->type == MTF_TABLE || first_table == NULL)
859 file->prev = mtf.tail;
861 mtf.tail->next = file;
863 if (mtf.head == NULL)
865 if (file->type == MTF_TABLE && first_table == NULL)
870 assert (file->type == MTF_FILE);
871 file->next = first_table;
872 file->prev = first_table->prev;
873 if (first_table->prev)
874 first_table->prev->next = file;
877 first_table->prev = file;
885 if (used_active_file)
887 msg (SE, _("The active file may not be specified more "
891 used_active_file = true;
893 if (!proc_has_source ())
895 msg (SE, _("Cannot specify the active file since no active "
896 "file has been defined."));
900 if (proc_make_temporary_transformations_permanent ())
902 _("MATCH FILES may not be used after TEMPORARY when "
903 "the active file is an input source. "
904 "Temporary transformations will be made permanent."));
906 file->dict = default_dict;
910 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
911 if (file->handle == NULL)
914 file->reader = any_reader_open (file->handle, &file->dict);
915 if (file->reader == NULL)
918 case_create (&file->input, dict_get_next_value_idx (file->dict));
921 while (lex_match ('/'))
922 if (lex_match_id ("RENAME"))
924 if (!rename_variables (file->dict))
927 else if (lex_match_id ("IN"))
936 if (file->in_name != NULL)
938 msg (SE, _("Multiple IN subcommands for a single FILE or "
942 file->in_name = xstrdup (tokid);
947 mtf_merge_dictionary (mtf.dict, file);
952 if (lex_match (T_BY))
954 struct variable **by;
958 msg (SE, _("BY may appear at most once."));
963 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
964 PV_NO_DUPLICATE | PV_NO_SCRATCH))
967 for (iter = mtf.head; iter != NULL; iter = iter->next)
971 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
973 for (i = 0; i < mtf.by_cnt; i++)
975 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
976 if (iter->by[i] == NULL)
978 msg (SE, _("File %s lacks BY variable %s."),
979 iter->handle ? fh_get_name (iter->handle) : "*",
988 else if (lex_match_id ("FIRST"))
990 if (mtf.first[0] != '\0')
992 msg (SE, _("FIRST may appear at most once."));
997 if (!lex_force_id ())
999 strcpy (mtf.first, tokid);
1002 else if (lex_match_id ("LAST"))
1004 if (mtf.last[0] != '\0')
1006 msg (SE, _("LAST may appear at most once."));
1011 if (!lex_force_id ())
1013 strcpy (mtf.last, tokid);
1016 else if (lex_match_id ("MAP"))
1020 else if (lex_match_id ("DROP"))
1022 if (!drop_variables (mtf.dict))
1025 else if (lex_match_id ("KEEP"))
1027 if (!keep_variables (mtf.dict))
1036 if (!lex_match ('/') && token != '.')
1038 lex_end_of_command ();
1043 if (mtf.by_cnt == 0)
1047 msg (SE, _("BY is required when TABLE is specified."));
1052 msg (SE, _("BY is required when IN is specified."));
1057 /* Set up mapping from each file's variables to master
1059 for (iter = mtf.head; iter != NULL; iter = iter->next)
1061 struct dictionary *d = iter->dict;
1064 for (i = 0; i < dict_get_var_cnt (d); i++)
1066 struct variable *v = dict_get_var (d, i);
1067 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1073 /* Add IN variables to master dictionary. */
1074 for (iter = mtf.head; iter != NULL; iter = iter->next)
1075 if (iter->in_name != NULL)
1077 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1078 if (iter->in_var == NULL)
1080 msg (SE, _("IN variable name %s duplicates an "
1081 "existing variable name."),
1082 iter->in_var->name);
1085 iter->in_var->print = iter->in_var->write
1086 = make_output_format (FMT_F, 1, 0);
1089 /* MATCH FILES performs an n-way merge on all its input files.
1092 1. Read one input record from every input FILE.
1094 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1096 3. Find the FILE input record(s) that have minimum BY
1097 values. Store all the values from these input records into
1100 4. For every TABLE, read another record as long as the BY values
1101 on the TABLE's input record are less than the FILEs' BY values.
1102 If an exact match is found, store all the values from the TABLE
1103 input record into the output record.
1105 5. Write the output record.
1107 6. Read another record from each input file FILE and TABLE that
1108 we stored values from above. If we come to the end of one of the
1109 input files, remove it from the list of input files.
1111 7. Repeat from step 2.
1113 Unfortunately, this algorithm can't be implemented in a
1114 straightforward way because there's no function to read a
1115 record from the active file. Instead, it has to be written
1118 FIXME: For merging large numbers of files (more than 10?) a
1119 better algorithm would use a heap for finding minimum
1122 if (!used_active_file)
1123 discard_variables ();
1125 dict_compact_values (mtf.dict);
1126 mtf.output = casefile_create (dict_get_next_value_idx (mtf.dict));
1127 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1128 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1130 if (!mtf_read_nonactive_records (&mtf))
1133 if (used_active_file)
1135 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1136 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1139 ok = mtf_processing_finish (&mtf);
1141 discard_variables ();
1143 dict_destroy (default_dict);
1144 default_dict = mtf.dict;
1146 proc_set_source (storage_source_create (mtf.output));
1149 if (!mtf_free (&mtf))
1151 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1155 return CMD_CASCADING_FAILURE;
1158 /* Repeats 2...7 an arbitrary number of times. */
1160 mtf_processing_finish (void *mtf_)
1162 struct mtf_proc *mtf = mtf_;
1163 struct mtf_file *iter;
1165 /* Find the active file and delete it. */
1166 for (iter = mtf->head; iter; iter = iter->next)
1167 if (iter->handle == NULL)
1169 if (!mtf_delete_file_in_place (mtf, &iter))
1174 while (mtf->head && mtf->head->type == MTF_FILE)
1175 if (!mtf_processing (NULL, mtf))
1181 /* Return a string in a static buffer describing V's variable type and
1184 var_type_description (struct variable *v)
1186 static char buf[2][32];
1193 if (v->type == NUMERIC)
1194 strcpy (s, "numeric");
1197 assert (v->type == ALPHA);
1198 sprintf (s, "string with width %d", v->width);
1203 /* Closes FILE and frees its associated data.
1204 Returns true if successful, false if an I/O error
1205 occurred on FILE. */
1207 mtf_close_file (struct mtf_file *file)
1209 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1211 any_reader_close (file->reader);
1212 if (file->handle != NULL)
1213 dict_destroy (file->dict);
1214 case_destroy (&file->input);
1215 free (file->in_name);
1220 /* Free all the data for the MATCH FILES procedure.
1221 Returns true if successful, false if an I/O error
1224 mtf_free (struct mtf_proc *mtf)
1226 struct mtf_file *iter, *next;
1229 for (iter = mtf->head; iter; iter = next)
1232 assert (iter->dict != mtf->dict);
1233 if (!mtf_close_file (iter))
1238 dict_destroy (mtf->dict);
1239 case_destroy (&mtf->mtf_case);
1240 free (mtf->seq_nums);
1245 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1246 file in the chain, or to NULL if was the last in the chain.
1247 Returns true if successful, false if an I/O error occurred. */
1249 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1251 struct mtf_file *f = *file;
1255 f->prev->next = f->next;
1257 f->next->prev = f->prev;
1259 mtf->head = f->next;
1261 mtf->tail = f->prev;
1264 if (f->in_var != NULL)
1265 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1266 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1268 struct variable *v = dict_get_var (f->dict, i);
1269 struct variable *mv = get_master (v);
1272 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1274 if (v->type == NUMERIC)
1277 memset (out->s, ' ', v->width);
1281 return mtf_close_file (f);
1284 /* Read a record from every input file except the active file.
1285 Returns true if successful, false if an I/O error occurred. */
1287 mtf_read_nonactive_records (void *mtf_)
1289 struct mtf_proc *mtf = mtf_;
1290 struct mtf_file *iter, *next;
1293 for (iter = mtf->head; ok && iter != NULL; iter = next)
1296 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1297 if (!mtf_delete_file_in_place (mtf, &iter))
1303 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1304 if A == B, 1 if A > B. */
1306 mtf_compare_BY_values (struct mtf_proc *mtf,
1307 struct mtf_file *a, struct mtf_file *b,
1308 const struct ccase *c)
1310 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1311 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1312 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1313 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1316 /* Perform one iteration of steps 3...7 above.
1317 Returns true if successful, false if an I/O error occurred. */
1319 mtf_processing (const struct ccase *c, void *mtf_)
1321 struct mtf_proc *mtf = mtf_;
1323 /* Do we need another record from the active file? */
1324 bool read_active_file;
1326 assert (mtf->head != NULL);
1327 if (mtf->head->type == MTF_TABLE)
1332 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1333 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1334 struct mtf_file *iter, *next;
1336 read_active_file = false;
1338 /* 3. Find the FILE input record(s) that have minimum BY
1339 values. Store all the values from these input records into
1340 the output record. */
1341 min_head = min_tail = mtf->head;
1342 max_head = max_tail = NULL;
1343 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1346 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1350 max_tail = max_tail->next_min = iter;
1352 max_head = max_tail = iter;
1355 min_tail = min_tail->next_min = iter;
1360 max_tail->next_min = min_head;
1361 max_tail = min_tail;
1365 max_head = min_head;
1366 max_tail = min_tail;
1368 min_head = min_tail = iter;
1372 /* 4. For every TABLE, read another record as long as the BY
1373 values on the TABLE's input record are less than the FILEs'
1374 BY values. If an exact match is found, store all the values
1375 from the TABLE input record into the output record. */
1376 for (; iter != NULL; iter = next)
1378 assert (iter->type == MTF_TABLE);
1383 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1387 max_tail = max_tail->next_min = iter;
1389 max_head = max_tail = iter;
1392 min_tail = min_tail->next_min = iter;
1395 if (iter->handle == NULL)
1397 if (any_reader_read (iter->reader, &iter->input))
1399 if (!mtf_delete_file_in_place (mtf, &iter))
1406 /* Next sequence number. */
1409 /* Store data to all the records we are using. */
1411 min_tail->next_min = NULL;
1412 for (iter = min_head; iter; iter = iter->next_min)
1416 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1418 struct variable *v = dict_get_var (iter->dict, i);
1419 struct variable *mv = get_master (v);
1421 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1423 const struct ccase *record
1424 = case_is_null (&iter->input) ? c : &iter->input;
1425 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1427 mtf->seq_nums[mv->index] = mtf->seq_num;
1428 if (v->type == NUMERIC)
1429 out->f = case_num (record, v->fv);
1431 memcpy (out->s, case_str (record, v->fv), v->width);
1434 if (iter->in_var != NULL)
1435 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1437 if (iter->type == MTF_FILE && iter->handle == NULL)
1438 read_active_file = true;
1441 /* Store missing values to all the records we're not
1444 max_tail->next_min = NULL;
1445 for (iter = max_head; iter; iter = iter->next_min)
1449 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1451 struct variable *v = dict_get_var (iter->dict, i);
1452 struct variable *mv = get_master (v);
1454 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1456 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1457 mtf->seq_nums[mv->index] = mtf->seq_num;
1459 if (v->type == NUMERIC)
1462 memset (out->s, ' ', v->width);
1465 if (iter->in_var != NULL)
1466 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1469 /* 5. Write the output record. */
1470 casefile_append (mtf->output, &mtf->mtf_case);
1472 /* 6. Read another record from each input file FILE and TABLE
1473 that we stored values from above. If we come to the end of
1474 one of the input files, remove it from the list of input
1476 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1478 next = iter->next_min;
1479 if (iter->reader != NULL
1480 && !any_reader_read (iter->reader, &iter->input))
1481 if (!mtf_delete_file_in_place (mtf, &iter))
1485 while (!read_active_file
1486 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1491 /* Merge the dictionary for file F into master dictionary M. */
1493 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1495 struct dictionary *d = f->dict;
1496 const char *d_docs, *m_docs;
1499 if (dict_get_label (m) == NULL)
1500 dict_set_label (m, dict_get_label (d));
1502 d_docs = dict_get_documents (d);
1503 m_docs = dict_get_documents (m);
1507 dict_set_documents (m, d_docs);
1513 new_len = strlen (m_docs) + strlen (d_docs);
1514 new_docs = xmalloc (new_len + 1);
1515 strcpy (new_docs, m_docs);
1516 strcat (new_docs, d_docs);
1517 dict_set_documents (m, new_docs);
1522 for (i = 0; i < dict_get_var_cnt (d); i++)
1524 struct variable *dv = dict_get_var (d, i);
1525 struct variable *mv = dict_lookup_var (m, dv->name);
1527 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1532 if (mv->width != dv->width)
1534 msg (SE, _("Variable %s in file %s (%s) has different "
1535 "type or width from the same variable in "
1536 "earlier file (%s)."),
1537 dv->name, fh_get_name (f->handle),
1538 var_type_description (dv), var_type_description (mv));
1542 if (dv->width == mv->width)
1544 if (val_labs_count (dv->val_labs)
1545 && !val_labs_count (mv->val_labs))
1547 val_labs_destroy (mv->val_labs);
1548 mv->val_labs = val_labs_copy (dv->val_labs);
1550 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1551 mv_copy (&mv->miss, &dv->miss);
1554 if (dv->label && !mv->label)
1555 mv->label = xstrdup (dv->label);
1558 mv = dict_clone_var_assert (m, dv, dv->name);
1564 /* Marks V's master variable as MASTER. */
1566 set_master (struct variable *v, struct variable *master)
1568 var_attach_aux (v, master, NULL);
1571 /* Returns the master variable corresponding to V,
1572 as set with set_master(). */
1573 static struct variable *
1574 get_master (struct variable *v)
1583 A case map copies data from a case that corresponds for one
1584 dictionary to a case that corresponds to a second dictionary
1585 derived from the first by, optionally, deleting, reordering,
1586 or renaming variables. (No new variables may be created.)
1592 size_t value_cnt; /* Number of values in map. */
1593 int *map; /* For each destination index, the
1594 corresponding source index. */
1597 /* Prepares dictionary D for producing a case map. Afterward,
1598 the caller may delete, reorder, or rename variables within D
1599 at will before using finish_case_map() to produce the case
1602 Uses D's aux members, which must otherwise not be in use. */
1604 start_case_map (struct dictionary *d)
1606 size_t var_cnt = dict_get_var_cnt (d);
1609 for (i = 0; i < var_cnt; i++)
1611 struct variable *v = dict_get_var (d, i);
1612 int *src_fv = xmalloc (sizeof *src_fv);
1614 var_attach_aux (v, src_fv, var_dtor_free);
1618 /* Produces a case map from dictionary D, which must have been
1619 previously prepared with start_case_map().
1621 Does not retain any reference to D, and clears the aux members
1622 set up by start_case_map().
1624 Returns the new case map, or a null pointer if no mapping is
1625 required (that is, no data has changed position). */
1626 static struct case_map *
1627 finish_case_map (struct dictionary *d)
1629 struct case_map *map;
1630 size_t var_cnt = dict_get_var_cnt (d);
1634 map = xmalloc (sizeof *map);
1635 map->value_cnt = dict_get_next_value_idx (d);
1636 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1637 for (i = 0; i < map->value_cnt; i++)
1641 for (i = 0; i < var_cnt; i++)
1643 struct variable *v = dict_get_var (d, i);
1644 int *src_fv = (int *) var_detach_aux (v);
1647 if (v->fv != *src_fv)
1650 for (idx = 0; idx < v->nv; idx++)
1652 int src_idx = *src_fv + idx;
1653 int dst_idx = v->fv + idx;
1655 assert (map->map[dst_idx] == -1);
1656 map->map[dst_idx] = src_idx;
1663 destroy_case_map (map);
1667 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1673 /* Maps from SRC to DST, applying case map MAP. */
1675 map_case (const struct case_map *map,
1676 const struct ccase *src, struct ccase *dst)
1680 assert (map != NULL);
1681 assert (src != NULL);
1682 assert (dst != NULL);
1683 assert (src != dst);
1685 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1687 int src_idx = map->map[dst_idx];
1689 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1693 /* Destroys case map MAP. */
1695 destroy_case_map (struct case_map *map)