1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/por-file-writer.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/storage-stream.h>
35 #include <data/sys-file-writer.h>
36 #include <data/transformations.h>
37 #include <data/value-labels.h>
38 #include <data/variable.h>
39 #include <language/command.h>
40 #include <language/data-io/file-handle.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/alloc.h>
44 #include <libpspp/compiler.h>
45 #include <libpspp/hash.h>
46 #include <libpspp/message.h>
47 #include <libpspp/message.h>
48 #include <libpspp/misc.h>
49 #include <libpspp/str.h>
52 #define _(msgid) gettext (msgid)
54 /* Rearranging and reducing a dictionary. */
55 static void start_case_map (struct dictionary *);
56 static struct case_map *finish_case_map (struct dictionary *);
57 static void map_case (const struct case_map *,
58 const struct ccase *, struct ccase *);
59 static void destroy_case_map (struct case_map *);
61 static bool parse_dict_trim (struct dictionary *);
63 /* Reading system and portable files. */
65 /* Type of command. */
72 /* Case reader input program. */
73 struct case_reader_pgm
75 struct any_reader *reader; /* File reader. */
76 struct case_map *map; /* Map from file dict to active file dict. */
77 struct ccase bounce; /* Bounce buffer. */
80 static const struct case_source_class case_reader_source_class;
82 static void case_reader_pgm_free (struct case_reader_pgm *);
84 /* Parses a GET or IMPORT command. */
86 parse_read_command (enum reader_command type)
88 struct case_reader_pgm *pgm = NULL;
89 struct file_handle *fh = NULL;
90 struct dictionary *dict = NULL;
96 if (lex_match_id ("FILE") || token == T_STRING)
100 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
104 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
108 if (lex_match_id ("COMM"))
110 else if (lex_match_id ("TAPE"))
114 lex_error (_("expecting COMM or TAPE"));
124 lex_sbc_missing ("FILE");
128 discard_variables ();
130 pgm = xmalloc (sizeof *pgm);
131 pgm->reader = any_reader_open (fh, &dict);
133 case_nullify (&pgm->bounce);
134 if (pgm->reader == NULL)
137 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
139 start_case_map (dict);
144 if (!parse_dict_trim (dict))
148 pgm->map = finish_case_map (dict);
150 dict_destroy (default_dict);
153 proc_set_source (create_case_source (&case_reader_source_class, pgm));
158 case_reader_pgm_free (pgm);
161 return CMD_CASCADING_FAILURE;
164 /* Frees a struct case_reader_pgm. */
166 case_reader_pgm_free (struct case_reader_pgm *pgm)
170 any_reader_close (pgm->reader);
171 destroy_case_map (pgm->map);
172 case_destroy (&pgm->bounce);
177 /* Clears internal state related to case reader input procedure. */
179 case_reader_source_destroy (struct case_source *source)
181 struct case_reader_pgm *pgm = source->aux;
182 case_reader_pgm_free (pgm);
185 /* Reads all the cases from the data file into C and passes them
186 to WRITE_CASE one by one, passing WC_DATA.
187 Returns true if successful, false if an I/O error occurred. */
189 case_reader_source_read (struct case_source *source,
191 write_case_func *write_case, write_case_data wc_data)
193 struct case_reader_pgm *pgm = source->aux;
199 if (pgm->map == NULL)
200 got_case = any_reader_read (pgm->reader, c);
203 got_case = any_reader_read (pgm->reader, &pgm->bounce);
205 map_case (pgm->map, &pgm->bounce, c);
210 ok = write_case (wc_data);
214 return ok && !any_reader_error (pgm->reader);
217 static const struct case_source_class case_reader_source_class =
221 case_reader_source_read,
222 case_reader_source_destroy,
229 return parse_read_command (GET_CMD);
236 return parse_read_command (IMPORT_CMD);
239 /* Writing system and portable files. */
241 /* Type of output file. */
244 SYSFILE_WRITER, /* System file. */
245 PORFILE_WRITER /* Portable file. */
248 /* Type of a command. */
251 XFORM_CMD, /* Transformation. */
252 PROC_CMD /* Procedure. */
255 /* File writer plus a case map. */
258 struct any_writer *writer; /* File writer. */
259 struct case_map *map; /* Map to output file dictionary
260 (null pointer for identity mapping). */
261 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
266 case_writer_destroy (struct case_writer *aw)
271 ok = any_writer_close (aw->writer);
272 destroy_case_map (aw->map);
273 case_destroy (&aw->bounce);
279 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
280 WRITER_TYPE identifies the type of file to write,
281 and COMMAND_TYPE identifies the type of command.
283 On success, returns a writer.
284 For procedures only, sets *RETAIN_UNSELECTED to true if cases
285 that would otherwise be excluded by FILTER or USE should be
288 On failure, returns a null pointer. */
289 static struct case_writer *
290 parse_write_command (enum writer_type writer_type,
291 enum command_type command_type,
292 bool *retain_unselected)
295 struct file_handle *handle; /* Output file. */
296 struct dictionary *dict; /* Dictionary for output file. */
297 struct case_writer *aw; /* Writer. */
299 /* Common options. */
300 bool print_map; /* Print map? TODO. */
301 bool print_short_names; /* Print long-to-short name map. TODO. */
302 struct sfm_write_options sysfile_opts;
303 struct pfm_write_options porfile_opts;
305 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
306 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
307 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
309 if (command_type == PROC_CMD)
310 *retain_unselected = true;
313 dict = dict_clone (default_dict);
314 aw = xmalloc (sizeof *aw);
317 case_nullify (&aw->bounce);
319 print_short_names = false;
320 sysfile_opts = sfm_writer_default_options ();
321 porfile_opts = pfm_writer_default_options ();
323 start_case_map (dict);
324 dict_delete_scratch_vars (dict);
329 if (lex_match_id ("OUTFILE"))
333 lex_sbc_only_once ("OUTFILE");
339 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
343 else if (lex_match_id ("NAMES"))
344 print_short_names = true;
345 else if (lex_match_id ("PERMISSIONS"))
350 if (lex_match_id ("READONLY"))
352 else if (lex_match_id ("WRITEABLE"))
356 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
359 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
361 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
364 if (lex_match_id ("RETAIN"))
365 *retain_unselected = true;
366 else if (lex_match_id ("DELETE"))
367 *retain_unselected = false;
370 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
374 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
375 sysfile_opts.compress = true;
376 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
377 sysfile_opts.compress = false;
378 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
381 if (!lex_force_int ())
383 sysfile_opts.version = lex_integer ();
386 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
389 if (lex_match_id ("COMMUNICATIONS"))
390 porfile_opts.type = PFM_COMM;
391 else if (lex_match_id ("TAPE"))
392 porfile_opts.type = PFM_TAPE;
395 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
399 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
402 if (!lex_force_int ())
404 porfile_opts.digits = lex_integer ();
407 else if (!parse_dict_trim (dict))
410 if (!lex_match ('/'))
413 if (lex_end_of_command () != CMD_SUCCESS)
418 lex_sbc_missing ("OUTFILE");
422 dict_compact_values (dict);
423 aw->map = finish_case_map (dict);
425 case_create (&aw->bounce, dict_get_next_value_idx (dict));
427 if (fh_get_referent (handle) == FH_REF_FILE)
432 aw->writer = any_writer_from_sfm_writer (
433 sfm_open_writer (handle, dict, sysfile_opts));
436 aw->writer = any_writer_from_pfm_writer (
437 pfm_open_writer (handle, dict, porfile_opts));
442 aw->writer = any_writer_open (handle, dict);
443 if (aw->writer == NULL)
450 case_writer_destroy (aw);
455 /* Writes case C to writer AW. */
457 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
461 map_case (aw->map, c, &aw->bounce);
464 return any_writer_write (aw->writer, c);
467 /* SAVE and EXPORT. */
469 static bool output_proc (const struct ccase *, void *);
471 /* Parses and performs the SAVE or EXPORT procedure. */
473 parse_output_proc (enum writer_type writer_type)
475 bool retain_unselected;
476 struct variable *saved_filter_variable;
477 struct case_writer *aw;
480 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
482 return CMD_CASCADING_FAILURE;
484 saved_filter_variable = dict_get_filter (default_dict);
485 if (retain_unselected)
486 dict_set_filter (default_dict, NULL);
487 ok = procedure (output_proc, aw);
488 dict_set_filter (default_dict, saved_filter_variable);
490 case_writer_destroy (aw);
491 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
494 /* Writes case C to file. */
496 output_proc (const struct ccase *c, void *aw_)
498 struct case_writer *aw = aw_;
499 return case_writer_write_case (aw, c);
505 return parse_output_proc (SYSFILE_WRITER);
511 return parse_output_proc (PORFILE_WRITER);
514 /* XSAVE and XEXPORT. */
516 /* Transformation. */
519 struct case_writer *aw; /* Writer. */
522 static trns_proc_func output_trns_proc;
523 static trns_free_func output_trns_free;
525 /* Parses the XSAVE or XEXPORT transformation command. */
527 parse_output_trns (enum writer_type writer_type)
529 struct output_trns *t = xmalloc (sizeof *t);
530 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
534 return CMD_CASCADING_FAILURE;
537 add_transformation (output_trns_proc, output_trns_free, t);
541 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
543 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
545 struct output_trns *t = trns_;
546 case_writer_write_case (t->aw, c);
547 return TRNS_CONTINUE;
550 /* Frees an XSAVE or XEXPORT transformation.
551 Returns true if successful, false if an I/O error occurred. */
553 output_trns_free (void *trns_)
555 struct output_trns *t = trns_;
560 ok = case_writer_destroy (t->aw);
570 return parse_output_trns (SYSFILE_WRITER);
573 /* XEXPORT command. */
577 return parse_output_trns (PORFILE_WRITER);
580 static bool rename_variables (struct dictionary *dict);
581 static bool drop_variables (struct dictionary *dict);
582 static bool keep_variables (struct dictionary *dict);
584 /* Commands that read and write system files share a great deal
585 of common syntactic structure for rearranging and dropping
586 variables. This function parses this syntax and modifies DICT
587 appropriately. Returns true on success, false on failure. */
589 parse_dict_trim (struct dictionary *dict)
591 if (lex_match_id ("MAP"))
596 else if (lex_match_id ("DROP"))
597 return drop_variables (dict);
598 else if (lex_match_id ("KEEP"))
599 return keep_variables (dict);
600 else if (lex_match_id ("RENAME"))
601 return rename_variables (dict);
604 lex_error (_("expecting a valid subcommand"));
609 /* Parses and performs the RENAME subcommand of GET and SAVE. */
611 rename_variables (struct dictionary *dict)
629 v = parse_dict_variable (dict);
632 if (!lex_force_match ('=')
635 if (dict_lookup_var (dict, tokid) != NULL)
637 msg (SE, _("Cannot rename %s as %s because there already exists "
638 "a variable named %s. To rename variables with "
639 "overlapping names, use a single RENAME subcommand "
640 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
641 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
645 dict_rename_var (dict, v, tokid);
654 while (lex_match ('('))
658 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
660 if (!lex_match ('='))
662 msg (SE, _("`=' expected after variable list."));
665 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
669 msg (SE, _("Number of variables on left side of `=' (%d) does not "
670 "match number of variables on right side (%d), in "
671 "parenthesized group %d of RENAME subcommand."),
672 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
675 if (!lex_force_match (')'))
680 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
682 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
688 for (i = 0; i < nn; i++)
696 /* Parses and performs the DROP subcommand of GET and SAVE.
697 Returns true if successful, false on failure.*/
699 drop_variables (struct dictionary *dict)
705 if (!parse_variables (dict, &v, &nv, PV_NONE))
707 dict_delete_vars (dict, v, nv);
710 if (dict_get_var_cnt (dict) == 0)
712 msg (SE, _("Cannot DROP all variables from dictionary."));
718 /* Parses and performs the KEEP subcommand of GET and SAVE.
719 Returns true if successful, false on failure.*/
721 keep_variables (struct dictionary *dict)
728 if (!parse_variables (dict, &v, &nv, PV_NONE))
731 /* Move the specified variables to the beginning. */
732 dict_reorder_vars (dict, v, nv);
734 /* Delete the remaining variables. */
735 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
736 for (i = nv; i < dict_get_var_cnt (dict); i++)
737 v[i - nv] = dict_get_var (dict, i);
738 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
749 MTF_FILE, /* Specified on FILE= subcommand. */
750 MTF_TABLE /* Specified on TABLE= subcommand. */
753 /* One of the files on MATCH FILES. */
756 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
757 struct mtf_file *next_min; /* Next in the chain of minimums. */
759 int type; /* One of MTF_*. */
760 struct variable **by; /* List of BY variables for this file. */
761 struct file_handle *handle; /* File handle. */
762 struct any_reader *reader; /* File reader. */
763 struct dictionary *dict; /* Dictionary from system file. */
766 char *in_name; /* Variable name. */
767 struct variable *in_var; /* Variable (in master dictionary). */
769 struct ccase input; /* Input record. */
772 /* MATCH FILES procedure. */
775 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
776 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
778 bool ok; /* False if I/O error occurs. */
780 size_t by_cnt; /* Number of variables on BY subcommand. */
782 /* Names of FIRST, LAST variables. */
783 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
785 struct dictionary *dict; /* Dictionary of output file. */
786 struct casefile *output; /* MATCH FILES output. */
787 struct ccase mtf_case; /* Case used for output. */
789 unsigned seq_num; /* Have we initialized this variable? */
790 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
793 static bool mtf_free (struct mtf_proc *);
794 static bool mtf_close_file (struct mtf_file *);
795 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
796 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
798 static bool mtf_read_nonactive_records (void *);
799 static bool mtf_processing_finish (void *);
800 static bool mtf_processing (const struct ccase *, void *);
802 static char *var_type_description (struct variable *);
804 static void set_master (struct variable *, struct variable *master);
805 static struct variable *get_master (struct variable *);
807 /* Parse and execute the MATCH FILES command. */
809 cmd_match_files (void)
812 struct mtf_file *first_table = NULL;
813 struct mtf_file *iter;
815 bool used_active_file = false;
816 bool saw_table = false;
821 mtf.head = mtf.tail = NULL;
825 mtf.dict = dict_create ();
827 case_nullify (&mtf.mtf_case);
830 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
834 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
836 struct mtf_file *file = xmalloc (sizeof *file);
838 if (lex_match_id ("FILE"))
839 file->type = MTF_FILE;
840 else if (lex_match_id ("TABLE"))
842 file->type = MTF_TABLE;
853 file->in_name = NULL;
855 case_nullify (&file->input);
857 /* FILEs go first, then TABLEs. */
858 if (file->type == MTF_TABLE || first_table == NULL)
861 file->prev = mtf.tail;
863 mtf.tail->next = file;
865 if (mtf.head == NULL)
867 if (file->type == MTF_TABLE && first_table == NULL)
872 assert (file->type == MTF_FILE);
873 file->next = first_table;
874 file->prev = first_table->prev;
875 if (first_table->prev)
876 first_table->prev->next = file;
879 first_table->prev = file;
887 if (used_active_file)
889 msg (SE, _("The active file may not be specified more "
893 used_active_file = true;
895 if (!proc_has_source ())
897 msg (SE, _("Cannot specify the active file since no active "
898 "file has been defined."));
902 if (proc_make_temporary_transformations_permanent ())
904 _("MATCH FILES may not be used after TEMPORARY when "
905 "the active file is an input source. "
906 "Temporary transformations will be made permanent."));
908 file->dict = default_dict;
912 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
913 if (file->handle == NULL)
916 file->reader = any_reader_open (file->handle, &file->dict);
917 if (file->reader == NULL)
920 case_create (&file->input, dict_get_next_value_idx (file->dict));
923 while (lex_match ('/'))
924 if (lex_match_id ("RENAME"))
926 if (!rename_variables (file->dict))
929 else if (lex_match_id ("IN"))
938 if (file->in_name != NULL)
940 msg (SE, _("Multiple IN subcommands for a single FILE or "
944 file->in_name = xstrdup (tokid);
949 mtf_merge_dictionary (mtf.dict, file);
954 if (lex_match (T_BY))
956 struct variable **by;
960 msg (SE, _("BY may appear at most once."));
965 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
966 PV_NO_DUPLICATE | PV_NO_SCRATCH))
969 for (iter = mtf.head; iter != NULL; iter = iter->next)
973 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
975 for (i = 0; i < mtf.by_cnt; i++)
977 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
978 if (iter->by[i] == NULL)
980 msg (SE, _("File %s lacks BY variable %s."),
981 iter->handle ? fh_get_name (iter->handle) : "*",
990 else if (lex_match_id ("FIRST"))
992 if (mtf.first[0] != '\0')
994 msg (SE, _("FIRST may appear at most once."));
999 if (!lex_force_id ())
1001 strcpy (mtf.first, tokid);
1004 else if (lex_match_id ("LAST"))
1006 if (mtf.last[0] != '\0')
1008 msg (SE, _("LAST may appear at most once."));
1013 if (!lex_force_id ())
1015 strcpy (mtf.last, tokid);
1018 else if (lex_match_id ("MAP"))
1022 else if (lex_match_id ("DROP"))
1024 if (!drop_variables (mtf.dict))
1027 else if (lex_match_id ("KEEP"))
1029 if (!keep_variables (mtf.dict))
1038 if (!lex_match ('/') && token != '.')
1040 lex_end_of_command ();
1045 if (mtf.by_cnt == 0)
1049 msg (SE, _("BY is required when TABLE is specified."));
1054 msg (SE, _("BY is required when IN is specified."));
1059 /* Set up mapping from each file's variables to master
1061 for (iter = mtf.head; iter != NULL; iter = iter->next)
1063 struct dictionary *d = iter->dict;
1066 for (i = 0; i < dict_get_var_cnt (d); i++)
1068 struct variable *v = dict_get_var (d, i);
1069 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1075 /* Add IN variables to master dictionary. */
1076 for (iter = mtf.head; iter != NULL; iter = iter->next)
1077 if (iter->in_name != NULL)
1079 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1080 if (iter->in_var == NULL)
1082 msg (SE, _("IN variable name %s duplicates an "
1083 "existing variable name."),
1084 iter->in_var->name);
1087 iter->in_var->print = iter->in_var->write
1088 = make_output_format (FMT_F, 1, 0);
1091 /* MATCH FILES performs an n-way merge on all its input files.
1094 1. Read one input record from every input FILE.
1096 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1098 3. Find the FILE input record(s) that have minimum BY
1099 values. Store all the values from these input records into
1102 4. For every TABLE, read another record as long as the BY values
1103 on the TABLE's input record are less than the FILEs' BY values.
1104 If an exact match is found, store all the values from the TABLE
1105 input record into the output record.
1107 5. Write the output record.
1109 6. Read another record from each input file FILE and TABLE that
1110 we stored values from above. If we come to the end of one of the
1111 input files, remove it from the list of input files.
1113 7. Repeat from step 2.
1115 Unfortunately, this algorithm can't be implemented in a
1116 straightforward way because there's no function to read a
1117 record from the active file. Instead, it has to be written
1120 FIXME: For merging large numbers of files (more than 10?) a
1121 better algorithm would use a heap for finding minimum
1124 if (!used_active_file)
1125 discard_variables ();
1127 dict_compact_values (mtf.dict);
1128 mtf.output = casefile_create (dict_get_next_value_idx (mtf.dict));
1129 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1130 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1132 if (!mtf_read_nonactive_records (&mtf))
1135 if (used_active_file)
1137 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1138 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1141 ok = mtf_processing_finish (&mtf);
1143 discard_variables ();
1145 dict_destroy (default_dict);
1146 default_dict = mtf.dict;
1148 proc_set_source (storage_source_create (mtf.output));
1151 if (!mtf_free (&mtf))
1153 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1157 return CMD_CASCADING_FAILURE;
1160 /* Repeats 2...7 an arbitrary number of times. */
1162 mtf_processing_finish (void *mtf_)
1164 struct mtf_proc *mtf = mtf_;
1165 struct mtf_file *iter;
1167 /* Find the active file and delete it. */
1168 for (iter = mtf->head; iter; iter = iter->next)
1169 if (iter->handle == NULL)
1171 if (!mtf_delete_file_in_place (mtf, &iter))
1176 while (mtf->head && mtf->head->type == MTF_FILE)
1177 if (!mtf_processing (NULL, mtf))
1183 /* Return a string in a static buffer describing V's variable type and
1186 var_type_description (struct variable *v)
1188 static char buf[2][32];
1195 if (v->type == NUMERIC)
1196 strcpy (s, "numeric");
1199 assert (v->type == ALPHA);
1200 sprintf (s, "string with width %d", v->width);
1205 /* Closes FILE and frees its associated data.
1206 Returns true if successful, false if an I/O error
1207 occurred on FILE. */
1209 mtf_close_file (struct mtf_file *file)
1211 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1213 any_reader_close (file->reader);
1214 if (file->handle != NULL)
1215 dict_destroy (file->dict);
1216 case_destroy (&file->input);
1217 free (file->in_name);
1222 /* Free all the data for the MATCH FILES procedure.
1223 Returns true if successful, false if an I/O error
1226 mtf_free (struct mtf_proc *mtf)
1228 struct mtf_file *iter, *next;
1231 for (iter = mtf->head; iter; iter = next)
1234 assert (iter->dict != mtf->dict);
1235 if (!mtf_close_file (iter))
1240 dict_destroy (mtf->dict);
1241 case_destroy (&mtf->mtf_case);
1242 free (mtf->seq_nums);
1247 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1248 file in the chain, or to NULL if was the last in the chain.
1249 Returns true if successful, false if an I/O error occurred. */
1251 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1253 struct mtf_file *f = *file;
1257 f->prev->next = f->next;
1259 f->next->prev = f->prev;
1261 mtf->head = f->next;
1263 mtf->tail = f->prev;
1266 if (f->in_var != NULL)
1267 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1268 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1270 struct variable *v = dict_get_var (f->dict, i);
1271 struct variable *mv = get_master (v);
1274 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1276 if (v->type == NUMERIC)
1279 memset (out->s, ' ', v->width);
1283 return mtf_close_file (f);
1286 /* Read a record from every input file except the active file.
1287 Returns true if successful, false if an I/O error occurred. */
1289 mtf_read_nonactive_records (void *mtf_)
1291 struct mtf_proc *mtf = mtf_;
1292 struct mtf_file *iter, *next;
1295 for (iter = mtf->head; ok && iter != NULL; iter = next)
1298 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1299 if (!mtf_delete_file_in_place (mtf, &iter))
1305 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1306 if A == B, 1 if A > B. */
1308 mtf_compare_BY_values (struct mtf_proc *mtf,
1309 struct mtf_file *a, struct mtf_file *b,
1310 const struct ccase *c)
1312 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1313 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1314 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1315 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1318 /* Perform one iteration of steps 3...7 above.
1319 Returns true if successful, false if an I/O error occurred. */
1321 mtf_processing (const struct ccase *c, void *mtf_)
1323 struct mtf_proc *mtf = mtf_;
1325 /* Do we need another record from the active file? */
1326 bool read_active_file;
1328 assert (mtf->head != NULL);
1329 if (mtf->head->type == MTF_TABLE)
1334 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1335 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1336 struct mtf_file *iter, *next;
1338 read_active_file = false;
1340 /* 3. Find the FILE input record(s) that have minimum BY
1341 values. Store all the values from these input records into
1342 the output record. */
1343 min_head = min_tail = mtf->head;
1344 max_head = max_tail = NULL;
1345 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1348 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1352 max_tail = max_tail->next_min = iter;
1354 max_head = max_tail = iter;
1357 min_tail = min_tail->next_min = iter;
1362 max_tail->next_min = min_head;
1363 max_tail = min_tail;
1367 max_head = min_head;
1368 max_tail = min_tail;
1370 min_head = min_tail = iter;
1374 /* 4. For every TABLE, read another record as long as the BY
1375 values on the TABLE's input record are less than the FILEs'
1376 BY values. If an exact match is found, store all the values
1377 from the TABLE input record into the output record. */
1378 for (; iter != NULL; iter = next)
1380 assert (iter->type == MTF_TABLE);
1385 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1389 max_tail = max_tail->next_min = iter;
1391 max_head = max_tail = iter;
1394 min_tail = min_tail->next_min = iter;
1397 if (iter->handle == NULL)
1399 if (any_reader_read (iter->reader, &iter->input))
1401 if (!mtf_delete_file_in_place (mtf, &iter))
1408 /* Next sequence number. */
1411 /* Store data to all the records we are using. */
1413 min_tail->next_min = NULL;
1414 for (iter = min_head; iter; iter = iter->next_min)
1418 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1420 struct variable *v = dict_get_var (iter->dict, i);
1421 struct variable *mv = get_master (v);
1423 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1425 const struct ccase *record
1426 = case_is_null (&iter->input) ? c : &iter->input;
1427 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1429 mtf->seq_nums[mv->index] = mtf->seq_num;
1430 if (v->type == NUMERIC)
1431 out->f = case_num (record, v->fv);
1433 memcpy (out->s, case_str (record, v->fv), v->width);
1436 if (iter->in_var != NULL)
1437 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1439 if (iter->type == MTF_FILE && iter->handle == NULL)
1440 read_active_file = true;
1443 /* Store missing values to all the records we're not
1446 max_tail->next_min = NULL;
1447 for (iter = max_head; iter; iter = iter->next_min)
1451 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1453 struct variable *v = dict_get_var (iter->dict, i);
1454 struct variable *mv = get_master (v);
1456 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1458 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1459 mtf->seq_nums[mv->index] = mtf->seq_num;
1461 if (v->type == NUMERIC)
1464 memset (out->s, ' ', v->width);
1467 if (iter->in_var != NULL)
1468 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1471 /* 5. Write the output record. */
1472 casefile_append (mtf->output, &mtf->mtf_case);
1474 /* 6. Read another record from each input file FILE and TABLE
1475 that we stored values from above. If we come to the end of
1476 one of the input files, remove it from the list of input
1478 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1480 next = iter->next_min;
1481 if (iter->reader != NULL
1482 && !any_reader_read (iter->reader, &iter->input))
1483 if (!mtf_delete_file_in_place (mtf, &iter))
1487 while (!read_active_file
1488 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1493 /* Merge the dictionary for file F into master dictionary M. */
1495 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1497 struct dictionary *d = f->dict;
1498 const char *d_docs, *m_docs;
1501 if (dict_get_label (m) == NULL)
1502 dict_set_label (m, dict_get_label (d));
1504 d_docs = dict_get_documents (d);
1505 m_docs = dict_get_documents (m);
1509 dict_set_documents (m, d_docs);
1515 new_len = strlen (m_docs) + strlen (d_docs);
1516 new_docs = xmalloc (new_len + 1);
1517 strcpy (new_docs, m_docs);
1518 strcat (new_docs, d_docs);
1519 dict_set_documents (m, new_docs);
1524 for (i = 0; i < dict_get_var_cnt (d); i++)
1526 struct variable *dv = dict_get_var (d, i);
1527 struct variable *mv = dict_lookup_var (m, dv->name);
1529 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1534 if (mv->width != dv->width)
1536 msg (SE, _("Variable %s in file %s (%s) has different "
1537 "type or width from the same variable in "
1538 "earlier file (%s)."),
1539 dv->name, fh_get_name (f->handle),
1540 var_type_description (dv), var_type_description (mv));
1544 if (dv->width == mv->width)
1546 if (val_labs_count (dv->val_labs)
1547 && !val_labs_count (mv->val_labs))
1549 val_labs_destroy (mv->val_labs);
1550 mv->val_labs = val_labs_copy (dv->val_labs);
1552 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1553 mv_copy (&mv->miss, &dv->miss);
1556 if (dv->label && !mv->label)
1557 mv->label = xstrdup (dv->label);
1560 mv = dict_clone_var_assert (m, dv, dv->name);
1566 /* Marks V's master variable as MASTER. */
1568 set_master (struct variable *v, struct variable *master)
1570 var_attach_aux (v, master, NULL);
1573 /* Returns the master variable corresponding to V,
1574 as set with set_master(). */
1575 static struct variable *
1576 get_master (struct variable *v)
1585 A case map copies data from a case that corresponds for one
1586 dictionary to a case that corresponds to a second dictionary
1587 derived from the first by, optionally, deleting, reordering,
1588 or renaming variables. (No new variables may be created.)
1594 size_t value_cnt; /* Number of values in map. */
1595 int *map; /* For each destination index, the
1596 corresponding source index. */
1599 /* Prepares dictionary D for producing a case map. Afterward,
1600 the caller may delete, reorder, or rename variables within D
1601 at will before using finish_case_map() to produce the case
1604 Uses D's aux members, which must otherwise not be in use. */
1606 start_case_map (struct dictionary *d)
1608 size_t var_cnt = dict_get_var_cnt (d);
1611 for (i = 0; i < var_cnt; i++)
1613 struct variable *v = dict_get_var (d, i);
1614 int *src_fv = xmalloc (sizeof *src_fv);
1616 var_attach_aux (v, src_fv, var_dtor_free);
1620 /* Produces a case map from dictionary D, which must have been
1621 previously prepared with start_case_map().
1623 Does not retain any reference to D, and clears the aux members
1624 set up by start_case_map().
1626 Returns the new case map, or a null pointer if no mapping is
1627 required (that is, no data has changed position). */
1628 static struct case_map *
1629 finish_case_map (struct dictionary *d)
1631 struct case_map *map;
1632 size_t var_cnt = dict_get_var_cnt (d);
1636 map = xmalloc (sizeof *map);
1637 map->value_cnt = dict_get_next_value_idx (d);
1638 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1639 for (i = 0; i < map->value_cnt; i++)
1643 for (i = 0; i < var_cnt; i++)
1645 struct variable *v = dict_get_var (d, i);
1646 int *src_fv = (int *) var_detach_aux (v);
1649 if (v->fv != *src_fv)
1652 for (idx = 0; idx < v->nv; idx++)
1654 int src_idx = *src_fv + idx;
1655 int dst_idx = v->fv + idx;
1657 assert (map->map[dst_idx] == -1);
1658 map->map[dst_idx] = src_idx;
1665 destroy_case_map (map);
1669 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1675 /* Maps from SRC to DST, applying case map MAP. */
1677 map_case (const struct case_map *map,
1678 const struct ccase *src, struct ccase *dst)
1682 assert (map != NULL);
1683 assert (src != NULL);
1684 assert (dst != NULL);
1685 assert (src != dst);
1687 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1689 int src_idx = map->map[dst_idx];
1691 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1695 /* Destroys case map MAP. */
1697 destroy_case_map (struct case_map *map)