1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
51 #define _(msgid) gettext (msgid)
53 static bool parse_dict_trim (struct lexer *, struct dictionary *);
55 /* Reading system and portable files. */
57 /* Type of command. */
64 static void get_translate_case (struct ccase *, struct ccase *, void *map_);
65 static bool get_destroy_case_map (void *map_);
67 /* Parses a GET or IMPORT command. */
69 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
71 struct casereader *reader = NULL;
72 struct file_handle *fh = NULL;
73 struct dictionary *dict = NULL;
74 struct case_map *map = NULL;
78 lex_match (lexer, '/');
80 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
82 lex_match (lexer, '=');
85 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
89 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
91 lex_match (lexer, '=');
93 if (lex_match_id (lexer, "COMM"))
95 else if (lex_match_id (lexer, "TAPE"))
99 lex_error (lexer, _("expecting COMM or TAPE"));
109 lex_sbc_missing (lexer, "FILE");
113 reader = any_reader_open (fh, &dict);
117 case_map_prepare_dict (dict);
119 while (lex_token (lexer) != '.')
121 lex_match (lexer, '/');
122 if (!parse_dict_trim (lexer, dict))
125 dict_compact_values (dict);
127 map = case_map_from_dict (dict);
129 reader = casereader_create_translator (reader,
130 dict_get_next_value_idx (dict),
132 get_destroy_case_map,
135 proc_set_active_file (ds, reader, dict);
142 casereader_destroy (reader);
145 return CMD_CASCADING_FAILURE;
149 get_translate_case (struct ccase *input, struct ccase *output,
152 struct case_map *map = map_;
153 case_map_execute (map, input, output);
154 case_destroy (input);
158 get_destroy_case_map (void *map_)
160 struct case_map *map = map_;
161 case_map_destroy (map);
167 cmd_get (struct lexer *lexer, struct dataset *ds)
169 return parse_read_command (lexer, ds, GET_CMD);
174 cmd_import (struct lexer *lexer, struct dataset *ds)
176 return parse_read_command (lexer, ds, IMPORT_CMD);
179 /* Writing system and portable files. */
181 /* Type of output file. */
184 SYSFILE_WRITER, /* System file. */
185 PORFILE_WRITER /* Portable file. */
188 /* Type of a command. */
191 XFORM_CMD, /* Transformation. */
192 PROC_CMD /* Procedure. */
195 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
196 WRITER_TYPE identifies the type of file to write,
197 and COMMAND_TYPE identifies the type of command.
199 On success, returns a writer.
200 For procedures only, sets *RETAIN_UNSELECTED to true if cases
201 that would otherwise be excluded by FILTER or USE should be
204 On failure, returns a null pointer. */
205 static struct casewriter *
206 parse_write_command (struct lexer *lexer, struct dataset *ds,
207 enum writer_type writer_type,
208 enum command_type command_type,
209 bool *retain_unselected)
212 struct file_handle *handle; /* Output file. */
213 struct dictionary *dict; /* Dictionary for output file. */
214 struct casewriter *writer; /* Writer. */
215 struct case_map *map; /* Map from input data to data for writer. */
217 /* Common options. */
218 bool print_map; /* Print map? TODO. */
219 bool print_short_names; /* Print long-to-short name map. TODO. */
220 struct sfm_write_options sysfile_opts;
221 struct pfm_write_options porfile_opts;
223 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
224 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
225 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
227 if (command_type == PROC_CMD)
228 *retain_unselected = true;
231 dict = dict_clone (dataset_dict (ds));
235 print_short_names = false;
236 sysfile_opts = sfm_writer_default_options ();
237 porfile_opts = pfm_writer_default_options ();
239 case_map_prepare_dict (dict);
240 dict_delete_scratch_vars (dict);
242 lex_match (lexer, '/');
245 if (lex_match_id (lexer, "OUTFILE"))
249 lex_sbc_only_once ("OUTFILE");
253 lex_match (lexer, '=');
255 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
259 else if (lex_match_id (lexer, "NAMES"))
260 print_short_names = true;
261 else if (lex_match_id (lexer, "PERMISSIONS"))
265 lex_match (lexer, '=');
266 if (lex_match_id (lexer, "READONLY"))
268 else if (lex_match_id (lexer, "WRITEABLE"))
272 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
275 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
277 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
279 lex_match (lexer, '=');
280 if (lex_match_id (lexer, "RETAIN"))
281 *retain_unselected = true;
282 else if (lex_match_id (lexer, "DELETE"))
283 *retain_unselected = false;
286 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
290 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
291 sysfile_opts.compress = true;
292 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
293 sysfile_opts.compress = false;
294 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
296 lex_match (lexer, '=');
297 if (!lex_force_int (lexer))
299 sysfile_opts.version = lex_integer (lexer);
302 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
304 lex_match (lexer, '=');
305 if (lex_match_id (lexer, "COMMUNICATIONS"))
306 porfile_opts.type = PFM_COMM;
307 else if (lex_match_id (lexer, "TAPE"))
308 porfile_opts.type = PFM_TAPE;
311 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
315 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
317 lex_match (lexer, '=');
318 if (!lex_force_int (lexer))
320 porfile_opts.digits = lex_integer (lexer);
323 else if (!parse_dict_trim (lexer, dict))
326 if (!lex_match (lexer, '/'))
329 if (lex_end_of_command (lexer) != CMD_SUCCESS)
334 lex_sbc_missing (lexer, "OUTFILE");
338 dict_delete_scratch_vars (dict);
339 dict_compact_values (dict);
341 if (fh_get_referent (handle) == FH_REF_FILE)
346 writer = sfm_open_writer (handle, dict, sysfile_opts);
349 writer = pfm_open_writer (handle, dict, porfile_opts);
354 writer = any_writer_open (handle, dict);
358 map = case_map_from_dict (dict);
360 writer = casewriter_create_translator (writer,
361 case_map_get_value_cnt (map),
363 get_destroy_case_map,
372 casewriter_destroy (writer);
374 case_map_destroy (map);
378 /* SAVE and EXPORT. */
380 /* Parses and performs the SAVE or EXPORT procedure. */
382 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
384 bool retain_unselected;
385 struct variable *saved_filter_variable;
386 struct casewriter *output;
389 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
392 return CMD_CASCADING_FAILURE;
394 saved_filter_variable = dict_get_filter (dataset_dict (ds));
395 if (retain_unselected)
396 dict_set_filter (dataset_dict (ds), NULL);
398 casereader_transfer (proc_open (ds), output);
399 ok = casewriter_destroy (output);
400 ok = proc_commit (ds) && ok;
402 dict_set_filter (dataset_dict (ds), saved_filter_variable);
404 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
408 cmd_save (struct lexer *lexer, struct dataset *ds)
410 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
414 cmd_export (struct lexer *lexer, struct dataset *ds)
416 return parse_output_proc (lexer, ds, PORFILE_WRITER);
419 /* XSAVE and XEXPORT. */
421 /* Transformation. */
424 struct casewriter *writer; /* Writer. */
427 static trns_proc_func output_trns_proc;
428 static trns_free_func output_trns_free;
430 /* Parses the XSAVE or XEXPORT transformation command. */
432 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
434 struct output_trns *t = xmalloc (sizeof *t);
435 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
436 if (t->writer == NULL)
439 return CMD_CASCADING_FAILURE;
442 add_transformation (ds, output_trns_proc, output_trns_free, t);
446 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
448 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
450 struct output_trns *t = trns_;
452 case_clone (&tmp, c);
453 casewriter_write (t->writer, &tmp);
454 return TRNS_CONTINUE;
457 /* Frees an XSAVE or XEXPORT transformation.
458 Returns true if successful, false if an I/O error occurred. */
460 output_trns_free (void *trns_)
462 struct output_trns *t = trns_;
463 bool ok = casewriter_destroy (t->writer);
470 cmd_xsave (struct lexer *lexer, struct dataset *ds)
472 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
475 /* XEXPORT command. */
477 cmd_xexport (struct lexer *lexer, struct dataset *ds)
479 return parse_output_trns (lexer, ds, PORFILE_WRITER);
482 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
483 static bool drop_variables (struct lexer *, struct dictionary *dict);
484 static bool keep_variables (struct lexer *, struct dictionary *dict);
486 /* Commands that read and write system files share a great deal
487 of common syntactic structure for rearranging and dropping
488 variables. This function parses this syntax and modifies DICT
489 appropriately. Returns true on success, false on failure. */
491 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
493 if (lex_match_id (lexer, "MAP"))
498 else if (lex_match_id (lexer, "DROP"))
499 return drop_variables (lexer, dict);
500 else if (lex_match_id (lexer, "KEEP"))
501 return keep_variables (lexer, dict);
502 else if (lex_match_id (lexer, "RENAME"))
503 return rename_variables (lexer, dict);
506 lex_error (lexer, _("expecting a valid subcommand"));
511 /* Parses and performs the RENAME subcommand of GET and SAVE. */
513 rename_variables (struct lexer *lexer, struct dictionary *dict)
526 lex_match (lexer, '=');
527 if (lex_token (lexer) != '(')
531 v = parse_variable (lexer, dict);
534 if (!lex_force_match (lexer, '=')
535 || !lex_force_id (lexer))
537 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
539 msg (SE, _("Cannot rename %s as %s because there already exists "
540 "a variable named %s. To rename variables with "
541 "overlapping names, use a single RENAME subcommand "
542 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
543 "\"/RENAME (A B C=B C A)\"."),
544 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
548 dict_rename_var (dict, v, lex_tokid (lexer));
557 while (lex_match (lexer, '('))
561 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
563 if (!lex_match (lexer, '='))
565 msg (SE, _("`=' expected after variable list."));
568 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
572 msg (SE, _("Number of variables on left side of `=' (%zu) does not "
573 "match number of variables on right side (%zu), in "
574 "parenthesized group %d of RENAME subcommand."),
575 nv - old_nv, nn - old_nv, group);
578 if (!lex_force_match (lexer, ')'))
583 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
585 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
591 for (i = 0; i < nn; i++)
599 /* Parses and performs the DROP subcommand of GET and SAVE.
600 Returns true if successful, false on failure.*/
602 drop_variables (struct lexer *lexer, struct dictionary *dict)
607 lex_match (lexer, '=');
608 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
610 dict_delete_vars (dict, v, nv);
613 if (dict_get_var_cnt (dict) == 0)
615 msg (SE, _("Cannot DROP all variables from dictionary."));
621 /* Parses and performs the KEEP subcommand of GET and SAVE.
622 Returns true if successful, false on failure.*/
624 keep_variables (struct lexer *lexer, struct dictionary *dict)
630 lex_match (lexer, '=');
631 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
634 /* Move the specified variables to the beginning. */
635 dict_reorder_vars (dict, v, nv);
637 /* Delete the remaining variables. */
638 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
639 for (i = nv; i < dict_get_var_cnt (dict); i++)
640 v[i - nv] = dict_get_var (dict, i);
641 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
652 MTF_FILE, /* Specified on FILE= subcommand. */
653 MTF_TABLE /* Specified on TABLE= subcommand. */
656 /* One of the FILEs or TABLEs on MATCH FILES. */
659 struct ll ll; /* In list of all files and tables. */
664 const struct variable **by; /* List of BY variables for this file. */
665 struct mtf_variable *vars; /* Variables to copy to output. */
666 size_t var_cnt; /* Number of other variables. */
668 struct file_handle *handle; /* Input file handle. */
669 struct dictionary *dict; /* Input file dictionary. */
670 struct casereader *reader; /* Input reader. */
671 struct ccase input; /* Input record (null at end of file). */
674 char *in_name; /* Variable name. */
675 struct variable *in_var; /* Variable (in master dictionary). */
680 struct variable *in_var;
681 struct variable *out_var;
684 /* MATCH FILES procedure. */
687 struct ll_list files; /* List of "struct mtf_file"s. */
688 int nonempty_files; /* FILEs that are not at end-of-file. */
690 bool ok; /* False if I/O error occurs. */
692 struct dictionary *dict; /* Dictionary of output file. */
693 struct casewriter *output; /* MATCH FILES output. */
695 size_t by_cnt; /* Number of variables on BY subcommand. */
698 Only if "first" or "last" is nonnull are the remaining
700 struct variable *first; /* Variable specified on FIRST (if any). */
701 struct variable *last; /* Variable specified on LAST (if any). */
702 struct ccase buffered_case; /* Case ready for output except that we don't
703 know the value for the LAST variable yet. */
704 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
705 const struct variable **prev_BY; /* Last set of BY variables. */
708 static void mtf_free (struct mtf_proc *);
710 static bool mtf_close_all_files (struct mtf_proc *);
711 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
712 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
714 static void mtf_process_case (struct mtf_proc *);
716 static bool create_flag_var (const char *subcommand_name, const char *var_name,
717 struct dictionary *, struct variable **);
718 static char *var_type_description (struct variable *);
720 /* Parse and execute the MATCH FILES command. */
722 cmd_match_files (struct lexer *lexer, struct dataset *ds)
725 struct ll *first_table;
726 struct mtf_file *file, *next;
729 struct casereader *active_file = NULL;
731 char first_name[VAR_NAME_LEN + 1] = "";
732 char last_name[VAR_NAME_LEN + 1] = "";
734 struct taint *taint = NULL;
738 ll_init (&mtf.files);
739 mtf.nonempty_files = 0;
740 first_table = ll_null (&mtf.files);
741 mtf.dict = dict_create ();
744 mtf.first = mtf.last = NULL;
745 case_nullify (&mtf.buffered_case);
746 case_nullify (&mtf.prev_BY_case);
749 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
751 lex_match (lexer, '/');
752 while (lex_token (lexer) == T_ID
753 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
754 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
756 struct mtf_file *file = xmalloc (sizeof *file);
761 file->in_name = NULL;
765 case_nullify (&file->input);
767 if (lex_match_id (lexer, "FILE"))
769 file->type = MTF_FILE;
770 ll_insert (first_table, &file->ll);
771 mtf.nonempty_files++;
773 else if (lex_match_id (lexer, "TABLE"))
775 file->type = MTF_TABLE;
776 ll_push_tail (&mtf.files, &file->ll);
777 if (first_table == ll_null (&mtf.files))
778 first_table = &file->ll;
782 lex_match (lexer, '=');
784 if (lex_match (lexer, '*'))
786 if (!proc_has_active_file (ds))
788 msg (SE, _("Cannot specify the active file since no active "
789 "file has been defined."));
793 if (proc_make_temporary_transformations_permanent (ds))
795 _("MATCH FILES may not be used after TEMPORARY when "
796 "the active file is an input source. "
797 "Temporary transformations will be made permanent."));
799 file->dict = dict_clone (dataset_dict (ds));
803 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
804 if (file->handle == NULL)
807 file->reader = any_reader_open (file->handle, &file->dict);
808 if (file->reader == NULL)
812 while (lex_match (lexer, '/'))
813 if (lex_match_id (lexer, "RENAME"))
815 if (!rename_variables (lexer, file->dict))
818 else if (lex_match_id (lexer, "IN"))
820 lex_match (lexer, '=');
821 if (lex_token (lexer) != T_ID)
823 lex_error (lexer, NULL);
827 if (file->in_name != NULL)
829 msg (SE, _("Multiple IN subcommands for a single FILE or "
833 file->in_name = xstrdup (lex_tokid (lexer));
838 mtf_merge_dictionary (mtf.dict, file);
841 while (lex_token (lexer) != '.')
843 if (lex_match (lexer, T_BY))
845 struct mtf_file *file;
846 struct variable **by;
851 lex_sbc_only_once ("BY");
855 lex_match (lexer, '=');
856 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
857 PV_NO_DUPLICATE | PV_NO_SCRATCH))
861 ll_for_each (file, struct mtf_file, ll, &mtf.files)
865 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
866 for (i = 0; i < mtf.by_cnt; i++)
868 const char *var_name = var_get_name (by[i]);
869 file->by[i] = dict_lookup_var (file->dict, var_name);
870 if (file->by[i] == NULL)
872 if (file->handle != NULL)
873 msg (SE, _("File %s lacks BY variable %s."),
874 fh_get_name (file->handle), var_name);
876 msg (SE, _("Active file lacks BY variable %s."),
887 else if (lex_match_id (lexer, "FIRST"))
889 if (first_name[0] != '\0')
891 lex_sbc_only_once ("FIRST");
895 lex_match (lexer, '=');
896 if (!lex_force_id (lexer))
898 strcpy (first_name, lex_tokid (lexer));
901 else if (lex_match_id (lexer, "LAST"))
903 if (last_name[0] != '\0')
905 lex_sbc_only_once ("LAST");
909 lex_match (lexer, '=');
910 if (!lex_force_id (lexer))
912 strcpy (last_name, lex_tokid (lexer));
915 else if (lex_match_id (lexer, "MAP"))
919 else if (lex_match_id (lexer, "DROP"))
921 if (!drop_variables (lexer, mtf.dict))
924 else if (lex_match_id (lexer, "KEEP"))
926 if (!keep_variables (lexer, mtf.dict))
931 lex_error (lexer, NULL);
935 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
937 lex_end_of_command (lexer);
944 if (first_table != ll_null (&mtf.files))
946 msg (SE, _("BY is required when TABLE is specified."));
951 msg (SE, _("BY is required when IN is specified."));
956 /* Set up mapping from each file's variables to master
958 ll_for_each (file, struct mtf_file, ll, &mtf.files)
960 size_t in_var_cnt = dict_get_var_cnt (file->dict);
962 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
964 for (i = 0; i < in_var_cnt; i++)
966 struct variable *in_var = dict_get_var (file->dict, i);
967 struct variable *out_var = dict_lookup_var (mtf.dict,
968 var_get_name (in_var));
972 struct mtf_variable *mv = &file->vars[file->var_cnt++];
974 mv->out_var = out_var;
979 /* Add IN, FIRST, and LAST variables to master dictionary. */
980 ll_for_each (file, struct mtf_file, ll, &mtf.files)
981 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
983 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
984 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
987 dict_delete_scratch_vars (mtf.dict);
988 dict_compact_values (mtf.dict);
989 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
990 taint = taint_clone (casewriter_get_taint (mtf.output));
992 ll_for_each (file, struct mtf_file, ll, &mtf.files)
994 if (file->reader == NULL)
996 if (active_file == NULL)
998 proc_discard_output (ds);
999 file->reader = active_file = proc_open (ds);
1002 file->reader = casereader_clone (active_file);
1004 taint_propagate (casereader_get_taint (file->reader), taint);
1007 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1008 mtf_read_record (&mtf, file);
1009 while (mtf.nonempty_files > 0)
1010 mtf_process_case (&mtf);
1011 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1013 if (mtf.last != NULL)
1014 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1015 casewriter_write (mtf.output, &mtf.buffered_case);
1016 case_nullify (&mtf.buffered_case);
1018 mtf_close_all_files (&mtf);
1019 if (active_file != NULL)
1022 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1028 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1031 if (active_file != NULL)
1034 taint_destroy (taint);
1035 return CMD_CASCADING_FAILURE;
1038 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1039 attempts to create a variable named VAR_NAME, with format
1040 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1041 Returns true if successful, false if the variable name is a
1042 duplicate (in which case a message saying that the variable
1043 specified on the given SUBCOMMAND is a duplicate is emitted).
1044 Also returns true, without doing anything, if VAR_NAME is null
1047 create_flag_var (const char *subcommand, const char *var_name,
1048 struct dictionary *dict, struct variable **var)
1050 if (var_name != NULL && var_name[0] != '\0')
1052 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1053 *var = dict_create_var (dict, var_name, 0);
1056 msg (SE, _("Variable name %s specified on %s subcommand "
1057 "duplicates an existing variable name."),
1058 subcommand, var_name);
1061 var_set_both_formats (*var, &format);
1068 /* Return a string in an allocated buffer describing V's variable
1071 var_type_description (struct variable *v)
1073 if (var_is_numeric (v))
1074 return xstrdup ("numeric");
1076 return xasprintf ("string with width %d", var_get_width (v));
1079 /* Closes all the files in MTF and frees their associated data.
1080 Returns true if successful, false if an I/O error occurred on
1081 any of the files. */
1083 mtf_close_all_files (struct mtf_proc *mtf)
1085 struct mtf_file *file;
1088 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1090 fh_unref (file->handle);
1091 casereader_destroy (file->reader);
1093 dict_destroy (file->dict);
1094 free (file->in_name);
1095 case_destroy (&file->input);
1103 /* Frees all the data for the MATCH FILES procedure. */
1105 mtf_free (struct mtf_proc *mtf)
1107 mtf_close_all_files (mtf);
1108 dict_destroy (mtf->dict);
1109 casewriter_destroy (mtf->output);
1110 case_destroy (&mtf->buffered_case);
1111 case_destroy (&mtf->prev_BY_case);
1114 /* Reads the next record into FILE, if possible, and update MTF's
1115 nonempty_files count if not. */
1117 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1119 case_destroy (&file->input);
1120 if (!casereader_read (file->reader, &file->input))
1122 mtf->nonempty_files--;
1129 /* Compare the BY variables for files A and B; return -1 if A <
1130 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1131 then all records are equal.) */
1133 mtf_compare_BY_values (struct mtf_proc *mtf,
1134 struct mtf_file *a, struct mtf_file *b)
1136 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1139 /* Processes input files and write one case to the output file. */
1141 mtf_process_case (struct mtf_proc *mtf)
1144 struct mtf_file *min;
1145 struct mtf_file *file;
1149 /* Find the set of one or more FILEs whose BY values are
1150 minimal, as well as the set of zero or more TABLEs whose BY
1151 values equal those of the minimum FILEs.
1153 After each iteration of the loop, this invariant holds: the
1154 FILEs with minimum BY values thus far have "sequence"
1155 members equal to min_sequence, and "min" points to one of
1156 the mtf_files whose case has those minimum BY values, and
1157 similarly for TABLEs. */
1160 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1161 if (case_is_null (&file->input))
1162 file->sequence = -1;
1163 else if (file->type == MTF_FILE)
1165 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1167 file->sequence = cmp < 0 ? -1 : min_sequence;
1170 file->sequence = ++min_sequence;
1177 assert (min != NULL);
1180 cmp = mtf_compare_BY_values (mtf, min, file);
1182 while (cmp > 0 && mtf_read_record (mtf, file));
1183 file->sequence = cmp == 0 ? min_sequence : -1;
1186 /* Form the output case from the input cases. */
1187 case_create (&c, dict_get_next_value_idx (mtf->dict));
1188 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1190 struct variable *v = dict_get_var (mtf->dict, i);
1191 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1193 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1195 bool include_file = file->sequence == min_sequence;
1197 for (i = 0; i < file->var_cnt; i++)
1199 const struct mtf_variable *mv = &file->vars[i];
1200 const union value *in = case_data (&file->input, mv->in_var);
1201 union value *out = case_data_rw (&c, mv->out_var);
1202 value_copy (out, in, var_get_width (mv->in_var));
1204 if (file->in_var != NULL)
1205 case_data_rw (&c, file->in_var)->f = include_file;
1208 /* Write the output case. */
1209 if (mtf->first == NULL && mtf->last == NULL)
1211 /* With no FIRST or LAST variables, it's trivial. */
1212 casewriter_write (mtf->output, &c);
1216 /* It's harder with LAST, because we can't know whether
1217 this case is the last in a group until we've prepared
1218 the *next* case also. Thus, we buffer the previous
1219 output case until the next one is ready.
1221 We also have to save a copy of one of the previous input
1222 cases, so that we can compare the BY variables. We
1223 can't compare the BY variables between the current
1224 output case and the saved one because the BY variables
1225 might not be in the output (the user is allowed to drop
1228 if (mtf->prev_BY != NULL)
1230 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1231 min->by, mtf->prev_BY,
1233 if (mtf->last != NULL)
1234 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1235 casewriter_write (mtf->output, &mtf->buffered_case);
1240 case_move (&mtf->buffered_case, &c);
1241 if (mtf->first != NULL)
1242 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1246 mtf->prev_BY = min->by;
1247 case_destroy (&mtf->prev_BY_case);
1248 case_clone (&mtf->prev_BY_case, &min->input);
1252 /* Read another record from each input file FILE with minimum
1254 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1255 if (file->type == MTF_FILE)
1257 if (file->sequence == min_sequence)
1258 mtf_read_record (mtf, file);
1264 /* Merge the dictionary for file F into master dictionary M. */
1266 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1268 struct dictionary *d = f->dict;
1269 const char *d_docs, *m_docs;
1272 if (dict_get_label (m) == NULL)
1273 dict_set_label (m, dict_get_label (d));
1275 d_docs = dict_get_documents (d);
1276 m_docs = dict_get_documents (m);
1280 dict_set_documents (m, d_docs);
1283 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1284 dict_set_documents (m, new_docs);
1289 for (i = 0; i < dict_get_var_cnt (d); i++)
1291 struct variable *dv = dict_get_var (d, i);
1292 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1294 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1299 if (var_get_width (mv) != var_get_width (dv))
1301 char *dv_description = var_type_description (dv);
1302 char *mv_description = var_type_description (mv);
1303 msg (SE, _("Variable %s in file %s (%s) has different "
1304 "type or width from the same variable in "
1305 "earlier file (%s)."),
1306 var_get_name (dv), fh_get_name (f->handle),
1307 dv_description, mv_description);
1308 free (dv_description);
1309 free (mv_description);
1313 if (var_get_width (dv) == var_get_width (mv))
1315 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1316 var_set_value_labels (mv, var_get_value_labels (dv));
1317 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1318 var_set_missing_values (mv, var_get_missing_values (dv));
1321 if (var_get_label (dv) && !var_get_label (mv))
1322 var_set_label (mv, var_get_label (dv));
1325 mv = dict_clone_var_assert (m, dv, var_get_name (dv));