1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 #include <data/any-reader.h>
25 #include <data/any-writer.h>
26 #include <data/case-sink.h>
27 #include <data/case-source.h>
28 #include <data/case.h>
29 #include <data/casefile.h>
30 #include <data/dictionary.h>
31 #include <data/por-file-writer.h>
32 #include <data/procedure.h>
33 #include <data/settings.h>
34 #include <data/storage-stream.h>
35 #include <data/sys-file-writer.h>
36 #include <data/transformations.h>
37 #include <data/value-labels.h>
38 #include <data/variable.h>
39 #include <language/command.h>
40 #include <language/data-io/file-handle.h>
41 #include <language/lexer/lexer.h>
42 #include <language/lexer/variable-parser.h>
43 #include <libpspp/alloc.h>
44 #include <libpspp/assertion.h>
45 #include <libpspp/compiler.h>
46 #include <libpspp/hash.h>
47 #include <libpspp/message.h>
48 #include <libpspp/message.h>
49 #include <libpspp/misc.h>
50 #include <libpspp/str.h>
53 #define _(msgid) gettext (msgid)
55 /* Rearranging and reducing a dictionary. */
56 static void start_case_map (struct dictionary *);
57 static struct case_map *finish_case_map (struct dictionary *);
58 static void map_case (const struct case_map *,
59 const struct ccase *, struct ccase *);
60 static void destroy_case_map (struct case_map *);
62 static bool parse_dict_trim (struct dictionary *);
64 /* Reading system and portable files. */
66 /* Type of command. */
73 /* Case reader input program. */
74 struct case_reader_pgm
76 struct any_reader *reader; /* File reader. */
77 struct case_map *map; /* Map from file dict to active file dict. */
78 struct ccase bounce; /* Bounce buffer. */
81 static const struct case_source_class case_reader_source_class;
83 static void case_reader_pgm_free (struct case_reader_pgm *);
85 /* Parses a GET or IMPORT command. */
87 parse_read_command (enum reader_command type)
89 struct case_reader_pgm *pgm = NULL;
90 struct file_handle *fh = NULL;
91 struct dictionary *dict = NULL;
97 if (lex_match_id ("FILE") || token == T_STRING)
101 fh = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
105 else if (type == IMPORT_CMD && lex_match_id ("TYPE"))
109 if (lex_match_id ("COMM"))
111 else if (lex_match_id ("TAPE"))
115 lex_error (_("expecting COMM or TAPE"));
125 lex_sbc_missing ("FILE");
129 discard_variables ();
131 pgm = xmalloc (sizeof *pgm);
132 pgm->reader = any_reader_open (fh, &dict);
134 case_nullify (&pgm->bounce);
135 if (pgm->reader == NULL)
138 case_create (&pgm->bounce, dict_get_next_value_idx (dict));
140 start_case_map (dict);
145 if (!parse_dict_trim (dict))
149 pgm->map = finish_case_map (dict);
151 dict_destroy (default_dict);
154 proc_set_source (create_case_source (&case_reader_source_class, pgm));
159 case_reader_pgm_free (pgm);
162 return CMD_CASCADING_FAILURE;
165 /* Frees a struct case_reader_pgm. */
167 case_reader_pgm_free (struct case_reader_pgm *pgm)
171 any_reader_close (pgm->reader);
172 destroy_case_map (pgm->map);
173 case_destroy (&pgm->bounce);
178 /* Clears internal state related to case reader input procedure. */
180 case_reader_source_destroy (struct case_source *source)
182 struct case_reader_pgm *pgm = source->aux;
183 case_reader_pgm_free (pgm);
186 /* Reads all the cases from the data file into C and passes them
187 to WRITE_CASE one by one, passing WC_DATA.
188 Returns true if successful, false if an I/O error occurred. */
190 case_reader_source_read (struct case_source *source,
192 write_case_func *write_case, write_case_data wc_data)
194 struct case_reader_pgm *pgm = source->aux;
200 if (pgm->map == NULL)
201 got_case = any_reader_read (pgm->reader, c);
204 got_case = any_reader_read (pgm->reader, &pgm->bounce);
206 map_case (pgm->map, &pgm->bounce, c);
211 ok = write_case (wc_data);
215 return ok && !any_reader_error (pgm->reader);
218 static const struct case_source_class case_reader_source_class =
222 case_reader_source_read,
223 case_reader_source_destroy,
230 return parse_read_command (GET_CMD);
237 return parse_read_command (IMPORT_CMD);
240 /* Writing system and portable files. */
242 /* Type of output file. */
245 SYSFILE_WRITER, /* System file. */
246 PORFILE_WRITER /* Portable file. */
249 /* Type of a command. */
252 XFORM_CMD, /* Transformation. */
253 PROC_CMD /* Procedure. */
256 /* File writer plus a case map. */
259 struct any_writer *writer; /* File writer. */
260 struct case_map *map; /* Map to output file dictionary
261 (null pointer for identity mapping). */
262 struct ccase bounce; /* Bounce buffer for mapping (if needed). */
267 case_writer_destroy (struct case_writer *aw)
272 ok = any_writer_close (aw->writer);
273 destroy_case_map (aw->map);
274 case_destroy (&aw->bounce);
280 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
281 WRITER_TYPE identifies the type of file to write,
282 and COMMAND_TYPE identifies the type of command.
284 On success, returns a writer.
285 For procedures only, sets *RETAIN_UNSELECTED to true if cases
286 that would otherwise be excluded by FILTER or USE should be
289 On failure, returns a null pointer. */
290 static struct case_writer *
291 parse_write_command (enum writer_type writer_type,
292 enum command_type command_type,
293 bool *retain_unselected)
296 struct file_handle *handle; /* Output file. */
297 struct dictionary *dict; /* Dictionary for output file. */
298 struct case_writer *aw; /* Writer. */
300 /* Common options. */
301 bool print_map; /* Print map? TODO. */
302 bool print_short_names; /* Print long-to-short name map. TODO. */
303 struct sfm_write_options sysfile_opts;
304 struct pfm_write_options porfile_opts;
306 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
307 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
308 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
310 if (command_type == PROC_CMD)
311 *retain_unselected = true;
314 dict = dict_clone (default_dict);
315 aw = xmalloc (sizeof *aw);
318 case_nullify (&aw->bounce);
320 print_short_names = false;
321 sysfile_opts = sfm_writer_default_options ();
322 porfile_opts = pfm_writer_default_options ();
324 start_case_map (dict);
325 dict_delete_scratch_vars (dict);
330 if (lex_match_id ("OUTFILE"))
334 lex_sbc_only_once ("OUTFILE");
340 handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
344 else if (lex_match_id ("NAMES"))
345 print_short_names = true;
346 else if (lex_match_id ("PERMISSIONS"))
351 if (lex_match_id ("READONLY"))
353 else if (lex_match_id ("WRITEABLE"))
357 lex_error (_("expecting %s or %s"), "READONLY", "WRITEABLE");
360 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
362 else if (command_type == PROC_CMD && lex_match_id ("UNSELECTED"))
365 if (lex_match_id ("RETAIN"))
366 *retain_unselected = true;
367 else if (lex_match_id ("DELETE"))
368 *retain_unselected = false;
371 lex_error (_("expecting %s or %s"), "RETAIN", "DELETE");
375 else if (writer_type == SYSFILE_WRITER && lex_match_id ("COMPRESSED"))
376 sysfile_opts.compress = true;
377 else if (writer_type == SYSFILE_WRITER && lex_match_id ("UNCOMPRESSED"))
378 sysfile_opts.compress = false;
379 else if (writer_type == SYSFILE_WRITER && lex_match_id ("VERSION"))
382 if (!lex_force_int ())
384 sysfile_opts.version = lex_integer ();
387 else if (writer_type == PORFILE_WRITER && lex_match_id ("TYPE"))
390 if (lex_match_id ("COMMUNICATIONS"))
391 porfile_opts.type = PFM_COMM;
392 else if (lex_match_id ("TAPE"))
393 porfile_opts.type = PFM_TAPE;
396 lex_error (_("expecting %s or %s"), "COMM", "TAPE");
400 else if (writer_type == PORFILE_WRITER && lex_match_id ("DIGITS"))
403 if (!lex_force_int ())
405 porfile_opts.digits = lex_integer ();
408 else if (!parse_dict_trim (dict))
411 if (!lex_match ('/'))
414 if (lex_end_of_command () != CMD_SUCCESS)
419 lex_sbc_missing ("OUTFILE");
423 dict_compact_values (dict);
424 aw->map = finish_case_map (dict);
426 case_create (&aw->bounce, dict_get_next_value_idx (dict));
428 if (fh_get_referent (handle) == FH_REF_FILE)
433 aw->writer = any_writer_from_sfm_writer (
434 sfm_open_writer (handle, dict, sysfile_opts));
437 aw->writer = any_writer_from_pfm_writer (
438 pfm_open_writer (handle, dict, porfile_opts));
443 aw->writer = any_writer_open (handle, dict);
444 if (aw->writer == NULL)
451 case_writer_destroy (aw);
456 /* Writes case C to writer AW. */
458 case_writer_write_case (struct case_writer *aw, const struct ccase *c)
462 map_case (aw->map, c, &aw->bounce);
465 return any_writer_write (aw->writer, c);
468 /* SAVE and EXPORT. */
470 static bool output_proc (const struct ccase *, void *);
472 /* Parses and performs the SAVE or EXPORT procedure. */
474 parse_output_proc (enum writer_type writer_type)
476 bool retain_unselected;
477 struct variable *saved_filter_variable;
478 struct case_writer *aw;
481 aw = parse_write_command (writer_type, PROC_CMD, &retain_unselected);
483 return CMD_CASCADING_FAILURE;
485 saved_filter_variable = dict_get_filter (default_dict);
486 if (retain_unselected)
487 dict_set_filter (default_dict, NULL);
488 ok = procedure (output_proc, aw);
489 dict_set_filter (default_dict, saved_filter_variable);
491 case_writer_destroy (aw);
492 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
495 /* Writes case C to file. */
497 output_proc (const struct ccase *c, void *aw_)
499 struct case_writer *aw = aw_;
500 return case_writer_write_case (aw, c);
506 return parse_output_proc (SYSFILE_WRITER);
512 return parse_output_proc (PORFILE_WRITER);
515 /* XSAVE and XEXPORT. */
517 /* Transformation. */
520 struct case_writer *aw; /* Writer. */
523 static trns_proc_func output_trns_proc;
524 static trns_free_func output_trns_free;
526 /* Parses the XSAVE or XEXPORT transformation command. */
528 parse_output_trns (enum writer_type writer_type)
530 struct output_trns *t = xmalloc (sizeof *t);
531 t->aw = parse_write_command (writer_type, XFORM_CMD, NULL);
535 return CMD_CASCADING_FAILURE;
538 add_transformation (output_trns_proc, output_trns_free, t);
542 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
544 output_trns_proc (void *trns_, struct ccase *c, int case_num UNUSED)
546 struct output_trns *t = trns_;
547 case_writer_write_case (t->aw, c);
548 return TRNS_CONTINUE;
551 /* Frees an XSAVE or XEXPORT transformation.
552 Returns true if successful, false if an I/O error occurred. */
554 output_trns_free (void *trns_)
556 struct output_trns *t = trns_;
561 ok = case_writer_destroy (t->aw);
571 return parse_output_trns (SYSFILE_WRITER);
574 /* XEXPORT command. */
578 return parse_output_trns (PORFILE_WRITER);
581 static bool rename_variables (struct dictionary *dict);
582 static bool drop_variables (struct dictionary *dict);
583 static bool keep_variables (struct dictionary *dict);
585 /* Commands that read and write system files share a great deal
586 of common syntactic structure for rearranging and dropping
587 variables. This function parses this syntax and modifies DICT
588 appropriately. Returns true on success, false on failure. */
590 parse_dict_trim (struct dictionary *dict)
592 if (lex_match_id ("MAP"))
597 else if (lex_match_id ("DROP"))
598 return drop_variables (dict);
599 else if (lex_match_id ("KEEP"))
600 return keep_variables (dict);
601 else if (lex_match_id ("RENAME"))
602 return rename_variables (dict);
605 lex_error (_("expecting a valid subcommand"));
610 /* Parses and performs the RENAME subcommand of GET and SAVE. */
612 rename_variables (struct dictionary *dict)
630 v = parse_dict_variable (dict);
633 if (!lex_force_match ('=')
636 if (dict_lookup_var (dict, tokid) != NULL)
638 msg (SE, _("Cannot rename %s as %s because there already exists "
639 "a variable named %s. To rename variables with "
640 "overlapping names, use a single RENAME subcommand "
641 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
642 "\"/RENAME (A B C=B C A)\"."), v->name, tokid, tokid);
646 dict_rename_var (dict, v, tokid);
655 while (lex_match ('('))
659 if (!parse_variables (dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
661 if (!lex_match ('='))
663 msg (SE, _("`=' expected after variable list."));
666 if (!parse_DATA_LIST_vars (&new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
670 msg (SE, _("Number of variables on left side of `=' (%d) does not "
671 "match number of variables on right side (%d), in "
672 "parenthesized group %d of RENAME subcommand."),
673 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
676 if (!lex_force_match (')'))
681 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
683 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
689 for (i = 0; i < nn; i++)
697 /* Parses and performs the DROP subcommand of GET and SAVE.
698 Returns true if successful, false on failure.*/
700 drop_variables (struct dictionary *dict)
706 if (!parse_variables (dict, &v, &nv, PV_NONE))
708 dict_delete_vars (dict, v, nv);
711 if (dict_get_var_cnt (dict) == 0)
713 msg (SE, _("Cannot DROP all variables from dictionary."));
719 /* Parses and performs the KEEP subcommand of GET and SAVE.
720 Returns true if successful, false on failure.*/
722 keep_variables (struct dictionary *dict)
729 if (!parse_variables (dict, &v, &nv, PV_NONE))
732 /* Move the specified variables to the beginning. */
733 dict_reorder_vars (dict, v, nv);
735 /* Delete the remaining variables. */
736 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
737 for (i = nv; i < dict_get_var_cnt (dict); i++)
738 v[i - nv] = dict_get_var (dict, i);
739 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
750 MTF_FILE, /* Specified on FILE= subcommand. */
751 MTF_TABLE /* Specified on TABLE= subcommand. */
754 /* One of the files on MATCH FILES. */
757 struct mtf_file *next, *prev; /* Next, previous in the list of files. */
758 struct mtf_file *next_min; /* Next in the chain of minimums. */
760 int type; /* One of MTF_*. */
761 struct variable **by; /* List of BY variables for this file. */
762 struct file_handle *handle; /* File handle. */
763 struct any_reader *reader; /* File reader. */
764 struct dictionary *dict; /* Dictionary from system file. */
767 char *in_name; /* Variable name. */
768 struct variable *in_var; /* Variable (in master dictionary). */
770 struct ccase input; /* Input record. */
773 /* MATCH FILES procedure. */
776 struct mtf_file *head; /* First file mentioned on FILE or TABLE. */
777 struct mtf_file *tail; /* Last file mentioned on FILE or TABLE. */
779 bool ok; /* False if I/O error occurs. */
781 size_t by_cnt; /* Number of variables on BY subcommand. */
783 /* Names of FIRST, LAST variables. */
784 char first[LONG_NAME_LEN + 1], last[LONG_NAME_LEN + 1];
786 struct dictionary *dict; /* Dictionary of output file. */
787 struct casefile *output; /* MATCH FILES output. */
788 struct ccase mtf_case; /* Case used for output. */
790 unsigned seq_num; /* Have we initialized this variable? */
791 unsigned *seq_nums; /* Sequence numbers for each var in dict. */
794 static bool mtf_free (struct mtf_proc *);
795 static bool mtf_close_file (struct mtf_file *);
796 static int mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
797 static bool mtf_delete_file_in_place (struct mtf_proc *, struct mtf_file **);
799 static bool mtf_read_nonactive_records (void *);
800 static bool mtf_processing_finish (void *);
801 static bool mtf_processing (const struct ccase *, void *);
803 static char *var_type_description (struct variable *);
805 static void set_master (struct variable *, struct variable *master);
806 static struct variable *get_master (struct variable *);
808 /* Parse and execute the MATCH FILES command. */
810 cmd_match_files (void)
813 struct mtf_file *first_table = NULL;
814 struct mtf_file *iter;
816 bool used_active_file = false;
817 bool saw_table = false;
822 mtf.head = mtf.tail = NULL;
826 mtf.dict = dict_create ();
828 case_nullify (&mtf.mtf_case);
831 dict_set_case_limit (mtf.dict, dict_get_case_limit (default_dict));
835 && (lex_id_match ("FILE", tokid) || lex_id_match ("TABLE", tokid)))
837 struct mtf_file *file = xmalloc (sizeof *file);
839 if (lex_match_id ("FILE"))
840 file->type = MTF_FILE;
841 else if (lex_match_id ("TABLE"))
843 file->type = MTF_TABLE;
854 file->in_name = NULL;
856 case_nullify (&file->input);
858 /* FILEs go first, then TABLEs. */
859 if (file->type == MTF_TABLE || first_table == NULL)
862 file->prev = mtf.tail;
864 mtf.tail->next = file;
866 if (mtf.head == NULL)
868 if (file->type == MTF_TABLE && first_table == NULL)
873 assert (file->type == MTF_FILE);
874 file->next = first_table;
875 file->prev = first_table->prev;
876 if (first_table->prev)
877 first_table->prev->next = file;
880 first_table->prev = file;
888 if (used_active_file)
890 msg (SE, _("The active file may not be specified more "
894 used_active_file = true;
896 if (!proc_has_source ())
898 msg (SE, _("Cannot specify the active file since no active "
899 "file has been defined."));
903 if (proc_make_temporary_transformations_permanent ())
905 _("MATCH FILES may not be used after TEMPORARY when "
906 "the active file is an input source. "
907 "Temporary transformations will be made permanent."));
909 file->dict = default_dict;
913 file->handle = fh_parse (FH_REF_FILE | FH_REF_SCRATCH);
914 if (file->handle == NULL)
917 file->reader = any_reader_open (file->handle, &file->dict);
918 if (file->reader == NULL)
921 case_create (&file->input, dict_get_next_value_idx (file->dict));
924 while (lex_match ('/'))
925 if (lex_match_id ("RENAME"))
927 if (!rename_variables (file->dict))
930 else if (lex_match_id ("IN"))
939 if (file->in_name != NULL)
941 msg (SE, _("Multiple IN subcommands for a single FILE or "
945 file->in_name = xstrdup (tokid);
950 mtf_merge_dictionary (mtf.dict, file);
955 if (lex_match (T_BY))
957 struct variable **by;
961 msg (SE, _("BY may appear at most once."));
966 if (!parse_variables (mtf.dict, &by, &mtf.by_cnt,
967 PV_NO_DUPLICATE | PV_NO_SCRATCH))
970 for (iter = mtf.head; iter != NULL; iter = iter->next)
974 iter->by = xnmalloc (mtf.by_cnt, sizeof *iter->by);
976 for (i = 0; i < mtf.by_cnt; i++)
978 iter->by[i] = dict_lookup_var (iter->dict, by[i]->name);
979 if (iter->by[i] == NULL)
981 msg (SE, _("File %s lacks BY variable %s."),
982 iter->handle ? fh_get_name (iter->handle) : "*",
991 else if (lex_match_id ("FIRST"))
993 if (mtf.first[0] != '\0')
995 msg (SE, _("FIRST may appear at most once."));
1000 if (!lex_force_id ())
1002 strcpy (mtf.first, tokid);
1005 else if (lex_match_id ("LAST"))
1007 if (mtf.last[0] != '\0')
1009 msg (SE, _("LAST may appear at most once."));
1014 if (!lex_force_id ())
1016 strcpy (mtf.last, tokid);
1019 else if (lex_match_id ("MAP"))
1023 else if (lex_match_id ("DROP"))
1025 if (!drop_variables (mtf.dict))
1028 else if (lex_match_id ("KEEP"))
1030 if (!keep_variables (mtf.dict))
1039 if (!lex_match ('/') && token != '.')
1041 lex_end_of_command ();
1046 if (mtf.by_cnt == 0)
1050 msg (SE, _("BY is required when TABLE is specified."));
1055 msg (SE, _("BY is required when IN is specified."));
1060 /* Set up mapping from each file's variables to master
1062 for (iter = mtf.head; iter != NULL; iter = iter->next)
1064 struct dictionary *d = iter->dict;
1067 for (i = 0; i < dict_get_var_cnt (d); i++)
1069 struct variable *v = dict_get_var (d, i);
1070 struct variable *mv = dict_lookup_var (mtf.dict, v->name);
1076 /* Add IN variables to master dictionary. */
1077 for (iter = mtf.head; iter != NULL; iter = iter->next)
1078 if (iter->in_name != NULL)
1080 iter->in_var = dict_create_var (mtf.dict, iter->in_name, 0);
1081 if (iter->in_var == NULL)
1083 msg (SE, _("IN variable name %s duplicates an "
1084 "existing variable name."),
1085 iter->in_var->name);
1088 iter->in_var->print = iter->in_var->write
1089 = make_output_format (FMT_F, 1, 0);
1092 /* MATCH FILES performs an n-way merge on all its input files.
1095 1. Read one input record from every input FILE.
1097 2. If no FILEs are left, stop. Otherwise, proceed to step 3.
1099 3. Find the FILE input record(s) that have minimum BY
1100 values. Store all the values from these input records into
1103 4. For every TABLE, read another record as long as the BY values
1104 on the TABLE's input record are less than the FILEs' BY values.
1105 If an exact match is found, store all the values from the TABLE
1106 input record into the output record.
1108 5. Write the output record.
1110 6. Read another record from each input file FILE and TABLE that
1111 we stored values from above. If we come to the end of one of the
1112 input files, remove it from the list of input files.
1114 7. Repeat from step 2.
1116 Unfortunately, this algorithm can't be implemented in a
1117 straightforward way because there's no function to read a
1118 record from the active file. Instead, it has to be written
1121 FIXME: For merging large numbers of files (more than 10?) a
1122 better algorithm would use a heap for finding minimum
1125 if (!used_active_file)
1126 discard_variables ();
1128 dict_compact_values (mtf.dict);
1129 mtf.output = casefile_create (dict_get_next_value_idx (mtf.dict));
1130 mtf.seq_nums = xcalloc (dict_get_var_cnt (mtf.dict), sizeof *mtf.seq_nums);
1131 case_create (&mtf.mtf_case, dict_get_next_value_idx (mtf.dict));
1133 if (!mtf_read_nonactive_records (&mtf))
1136 if (used_active_file)
1138 proc_set_sink (create_case_sink (&null_sink_class, default_dict, NULL));
1139 ok = procedure (mtf_processing, &mtf) && mtf_processing_finish (&mtf);
1142 ok = mtf_processing_finish (&mtf);
1144 discard_variables ();
1146 dict_destroy (default_dict);
1147 default_dict = mtf.dict;
1149 proc_set_source (storage_source_create (mtf.output));
1152 if (!mtf_free (&mtf))
1154 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1158 return CMD_CASCADING_FAILURE;
1161 /* Repeats 2...7 an arbitrary number of times. */
1163 mtf_processing_finish (void *mtf_)
1165 struct mtf_proc *mtf = mtf_;
1166 struct mtf_file *iter;
1168 /* Find the active file and delete it. */
1169 for (iter = mtf->head; iter; iter = iter->next)
1170 if (iter->handle == NULL)
1172 if (!mtf_delete_file_in_place (mtf, &iter))
1177 while (mtf->head && mtf->head->type == MTF_FILE)
1178 if (!mtf_processing (NULL, mtf))
1184 /* Return a string in a static buffer describing V's variable type and
1187 var_type_description (struct variable *v)
1189 static char buf[2][32];
1196 if (v->type == NUMERIC)
1197 strcpy (s, "numeric");
1200 assert (v->type == ALPHA);
1201 sprintf (s, "string with width %d", v->width);
1206 /* Closes FILE and frees its associated data.
1207 Returns true if successful, false if an I/O error
1208 occurred on FILE. */
1210 mtf_close_file (struct mtf_file *file)
1212 bool ok = file->reader == NULL || !any_reader_error (file->reader);
1214 any_reader_close (file->reader);
1215 if (file->handle != NULL)
1216 dict_destroy (file->dict);
1217 case_destroy (&file->input);
1218 free (file->in_name);
1223 /* Free all the data for the MATCH FILES procedure.
1224 Returns true if successful, false if an I/O error
1227 mtf_free (struct mtf_proc *mtf)
1229 struct mtf_file *iter, *next;
1232 for (iter = mtf->head; iter; iter = next)
1235 assert (iter->dict != mtf->dict);
1236 if (!mtf_close_file (iter))
1241 dict_destroy (mtf->dict);
1242 case_destroy (&mtf->mtf_case);
1243 free (mtf->seq_nums);
1248 /* Remove *FILE from the mtf_file chain. Make *FILE point to the next
1249 file in the chain, or to NULL if was the last in the chain.
1250 Returns true if successful, false if an I/O error occurred. */
1252 mtf_delete_file_in_place (struct mtf_proc *mtf, struct mtf_file **file)
1254 struct mtf_file *f = *file;
1258 f->prev->next = f->next;
1260 f->next->prev = f->prev;
1262 mtf->head = f->next;
1264 mtf->tail = f->prev;
1267 if (f->in_var != NULL)
1268 case_data_rw (&mtf->mtf_case, f->in_var->fv)->f = 0.;
1269 for (i = 0; i < dict_get_var_cnt (f->dict); i++)
1271 struct variable *v = dict_get_var (f->dict, i);
1272 struct variable *mv = get_master (v);
1275 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1277 if (v->type == NUMERIC)
1280 memset (out->s, ' ', v->width);
1284 return mtf_close_file (f);
1287 /* Read a record from every input file except the active file.
1288 Returns true if successful, false if an I/O error occurred. */
1290 mtf_read_nonactive_records (void *mtf_)
1292 struct mtf_proc *mtf = mtf_;
1293 struct mtf_file *iter, *next;
1296 for (iter = mtf->head; ok && iter != NULL; iter = next)
1299 if (iter->handle && !any_reader_read (iter->reader, &iter->input))
1300 if (!mtf_delete_file_in_place (mtf, &iter))
1306 /* Compare the BY variables for files A and B; return -1 if A < B, 0
1307 if A == B, 1 if A > B. */
1309 mtf_compare_BY_values (struct mtf_proc *mtf,
1310 struct mtf_file *a, struct mtf_file *b,
1311 const struct ccase *c)
1313 const struct ccase *ca = case_is_null (&a->input) ? c : &a->input;
1314 const struct ccase *cb = case_is_null (&b->input) ? c : &b->input;
1315 assert ((a == NULL) + (b == NULL) + (c == NULL) <= 1);
1316 return case_compare_2dict (ca, cb, a->by, b->by, mtf->by_cnt);
1319 /* Perform one iteration of steps 3...7 above.
1320 Returns true if successful, false if an I/O error occurred. */
1322 mtf_processing (const struct ccase *c, void *mtf_)
1324 struct mtf_proc *mtf = mtf_;
1326 /* Do we need another record from the active file? */
1327 bool read_active_file;
1329 assert (mtf->head != NULL);
1330 if (mtf->head->type == MTF_TABLE)
1335 struct mtf_file *min_head, *min_tail; /* Files with minimum BY values. */
1336 struct mtf_file *max_head, *max_tail; /* Files with non-minimum BYs. */
1337 struct mtf_file *iter, *next;
1339 read_active_file = false;
1341 /* 3. Find the FILE input record(s) that have minimum BY
1342 values. Store all the values from these input records into
1343 the output record. */
1344 min_head = min_tail = mtf->head;
1345 max_head = max_tail = NULL;
1346 for (iter = mtf->head->next; iter && iter->type == MTF_FILE;
1349 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1353 max_tail = max_tail->next_min = iter;
1355 max_head = max_tail = iter;
1358 min_tail = min_tail->next_min = iter;
1363 max_tail->next_min = min_head;
1364 max_tail = min_tail;
1368 max_head = min_head;
1369 max_tail = min_tail;
1371 min_head = min_tail = iter;
1375 /* 4. For every TABLE, read another record as long as the BY
1376 values on the TABLE's input record are less than the FILEs'
1377 BY values. If an exact match is found, store all the values
1378 from the TABLE input record into the output record. */
1379 for (; iter != NULL; iter = next)
1381 assert (iter->type == MTF_TABLE);
1386 int cmp = mtf_compare_BY_values (mtf, min_head, iter, c);
1390 max_tail = max_tail->next_min = iter;
1392 max_head = max_tail = iter;
1395 min_tail = min_tail->next_min = iter;
1398 if (iter->handle == NULL)
1400 if (any_reader_read (iter->reader, &iter->input))
1402 if (!mtf_delete_file_in_place (mtf, &iter))
1409 /* Next sequence number. */
1412 /* Store data to all the records we are using. */
1414 min_tail->next_min = NULL;
1415 for (iter = min_head; iter; iter = iter->next_min)
1419 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1421 struct variable *v = dict_get_var (iter->dict, i);
1422 struct variable *mv = get_master (v);
1424 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1426 const struct ccase *record
1427 = case_is_null (&iter->input) ? c : &iter->input;
1428 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1430 mtf->seq_nums[mv->index] = mtf->seq_num;
1431 if (v->type == NUMERIC)
1432 out->f = case_num (record, v->fv);
1434 memcpy (out->s, case_str (record, v->fv), v->width);
1437 if (iter->in_var != NULL)
1438 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 1.;
1440 if (iter->type == MTF_FILE && iter->handle == NULL)
1441 read_active_file = true;
1444 /* Store missing values to all the records we're not
1447 max_tail->next_min = NULL;
1448 for (iter = max_head; iter; iter = iter->next_min)
1452 for (i = 0; i < dict_get_var_cnt (iter->dict); i++)
1454 struct variable *v = dict_get_var (iter->dict, i);
1455 struct variable *mv = get_master (v);
1457 if (mv != NULL && mtf->seq_nums[mv->index] != mtf->seq_num)
1459 union value *out = case_data_rw (&mtf->mtf_case, mv->fv);
1460 mtf->seq_nums[mv->index] = mtf->seq_num;
1462 if (v->type == NUMERIC)
1465 memset (out->s, ' ', v->width);
1468 if (iter->in_var != NULL)
1469 case_data_rw (&mtf->mtf_case, iter->in_var->fv)->f = 0.;
1472 /* 5. Write the output record. */
1473 casefile_append (mtf->output, &mtf->mtf_case);
1475 /* 6. Read another record from each input file FILE and TABLE
1476 that we stored values from above. If we come to the end of
1477 one of the input files, remove it from the list of input
1479 for (iter = min_head; iter && iter->type == MTF_FILE; iter = next)
1481 next = iter->next_min;
1482 if (iter->reader != NULL
1483 && !any_reader_read (iter->reader, &iter->input))
1484 if (!mtf_delete_file_in_place (mtf, &iter))
1488 while (!read_active_file
1489 && mtf->head != NULL && mtf->head->type == MTF_FILE);
1494 /* Merge the dictionary for file F into master dictionary M. */
1496 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1498 struct dictionary *d = f->dict;
1499 const char *d_docs, *m_docs;
1502 if (dict_get_label (m) == NULL)
1503 dict_set_label (m, dict_get_label (d));
1505 d_docs = dict_get_documents (d);
1506 m_docs = dict_get_documents (m);
1510 dict_set_documents (m, d_docs);
1516 new_len = strlen (m_docs) + strlen (d_docs);
1517 new_docs = xmalloc (new_len + 1);
1518 strcpy (new_docs, m_docs);
1519 strcat (new_docs, d_docs);
1520 dict_set_documents (m, new_docs);
1525 for (i = 0; i < dict_get_var_cnt (d); i++)
1527 struct variable *dv = dict_get_var (d, i);
1528 struct variable *mv = dict_lookup_var (m, dv->name);
1530 if (dict_class_from_id (dv->name) == DC_SCRATCH)
1535 if (mv->width != dv->width)
1537 msg (SE, _("Variable %s in file %s (%s) has different "
1538 "type or width from the same variable in "
1539 "earlier file (%s)."),
1540 dv->name, fh_get_name (f->handle),
1541 var_type_description (dv), var_type_description (mv));
1545 if (dv->width == mv->width)
1547 if (val_labs_count (dv->val_labs)
1548 && !val_labs_count (mv->val_labs))
1550 val_labs_destroy (mv->val_labs);
1551 mv->val_labs = val_labs_copy (dv->val_labs);
1553 if (!mv_is_empty (&dv->miss) && mv_is_empty (&mv->miss))
1554 mv_copy (&mv->miss, &dv->miss);
1557 if (dv->label && !mv->label)
1558 mv->label = xstrdup (dv->label);
1561 mv = dict_clone_var_assert (m, dv, dv->name);
1567 /* Marks V's master variable as MASTER. */
1569 set_master (struct variable *v, struct variable *master)
1571 var_attach_aux (v, master, NULL);
1574 /* Returns the master variable corresponding to V,
1575 as set with set_master(). */
1576 static struct variable *
1577 get_master (struct variable *v)
1586 A case map copies data from a case that corresponds for one
1587 dictionary to a case that corresponds to a second dictionary
1588 derived from the first by, optionally, deleting, reordering,
1589 or renaming variables. (No new variables may be created.)
1595 size_t value_cnt; /* Number of values in map. */
1596 int *map; /* For each destination index, the
1597 corresponding source index. */
1600 /* Prepares dictionary D for producing a case map. Afterward,
1601 the caller may delete, reorder, or rename variables within D
1602 at will before using finish_case_map() to produce the case
1605 Uses D's aux members, which must otherwise not be in use. */
1607 start_case_map (struct dictionary *d)
1609 size_t var_cnt = dict_get_var_cnt (d);
1612 for (i = 0; i < var_cnt; i++)
1614 struct variable *v = dict_get_var (d, i);
1615 int *src_fv = xmalloc (sizeof *src_fv);
1617 var_attach_aux (v, src_fv, var_dtor_free);
1621 /* Produces a case map from dictionary D, which must have been
1622 previously prepared with start_case_map().
1624 Does not retain any reference to D, and clears the aux members
1625 set up by start_case_map().
1627 Returns the new case map, or a null pointer if no mapping is
1628 required (that is, no data has changed position). */
1629 static struct case_map *
1630 finish_case_map (struct dictionary *d)
1632 struct case_map *map;
1633 size_t var_cnt = dict_get_var_cnt (d);
1637 map = xmalloc (sizeof *map);
1638 map->value_cnt = dict_get_next_value_idx (d);
1639 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1640 for (i = 0; i < map->value_cnt; i++)
1644 for (i = 0; i < var_cnt; i++)
1646 struct variable *v = dict_get_var (d, i);
1647 int *src_fv = (int *) var_detach_aux (v);
1650 if (v->fv != *src_fv)
1653 for (idx = 0; idx < v->nv; idx++)
1655 int src_idx = *src_fv + idx;
1656 int dst_idx = v->fv + idx;
1658 assert (map->map[dst_idx] == -1);
1659 map->map[dst_idx] = src_idx;
1666 destroy_case_map (map);
1670 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1676 /* Maps from SRC to DST, applying case map MAP. */
1678 map_case (const struct case_map *map,
1679 const struct ccase *src, struct ccase *dst)
1683 assert (map != NULL);
1684 assert (src != NULL);
1685 assert (dst != NULL);
1686 assert (src != dst);
1688 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1690 int src_idx = map->map[dst_idx];
1692 *case_data_rw (dst, dst_idx) = *case_data (src, src_idx);
1696 /* Destroys case map MAP. */
1698 destroy_case_map (struct case_map *map)