1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
53 #define _(msgid) gettext (msgid)
55 static bool parse_dict_trim (struct lexer *, struct dictionary *);
57 /* Reading system and portable files. */
59 /* Type of command. */
66 static void get_translate_case (struct ccase *, struct ccase *, void *map_);
67 static bool get_destroy_case_map (void *map_);
69 /* Parses a GET or IMPORT command. */
71 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
73 struct casereader *reader = NULL;
74 struct file_handle *fh = NULL;
75 struct dictionary *dict = NULL;
76 struct case_map *map = NULL;
78 if ( type == GET_CMD && lex_match_id (lexer, "DATA") )
80 return parse_get_data_command (lexer, ds);
85 lex_match (lexer, '/');
87 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
89 lex_match (lexer, '=');
92 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
96 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
98 lex_match (lexer, '=');
100 if (lex_match_id (lexer, "COMM"))
102 else if (lex_match_id (lexer, "TAPE"))
106 lex_error (lexer, _("expecting COMM or TAPE"));
116 lex_sbc_missing (lexer, "FILE");
120 reader = any_reader_open (fh, &dict);
124 case_map_prepare_dict (dict);
126 while (lex_token (lexer) != '.')
128 lex_match (lexer, '/');
129 if (!parse_dict_trim (lexer, dict))
132 dict_compact_values (dict);
134 map = case_map_from_dict (dict);
136 reader = casereader_create_translator (reader,
137 dict_get_next_value_idx (dict),
139 get_destroy_case_map,
142 proc_set_active_file (ds, reader, dict);
149 casereader_destroy (reader);
152 return CMD_CASCADING_FAILURE;
156 get_translate_case (struct ccase *input, struct ccase *output,
159 struct case_map *map = map_;
160 case_map_execute (map, input, output);
161 case_destroy (input);
165 get_destroy_case_map (void *map_)
167 struct case_map *map = map_;
168 case_map_destroy (map);
174 cmd_get (struct lexer *lexer, struct dataset *ds)
176 return parse_read_command (lexer, ds, GET_CMD);
181 cmd_import (struct lexer *lexer, struct dataset *ds)
183 return parse_read_command (lexer, ds, IMPORT_CMD);
186 /* Writing system and portable files. */
188 /* Type of output file. */
191 SYSFILE_WRITER, /* System file. */
192 PORFILE_WRITER /* Portable file. */
195 /* Type of a command. */
198 XFORM_CMD, /* Transformation. */
199 PROC_CMD /* Procedure. */
202 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
203 WRITER_TYPE identifies the type of file to write,
204 and COMMAND_TYPE identifies the type of command.
206 On success, returns a writer.
207 For procedures only, sets *RETAIN_UNSELECTED to true if cases
208 that would otherwise be excluded by FILTER or USE should be
211 On failure, returns a null pointer. */
212 static struct casewriter *
213 parse_write_command (struct lexer *lexer, struct dataset *ds,
214 enum writer_type writer_type,
215 enum command_type command_type,
216 bool *retain_unselected)
219 struct file_handle *handle; /* Output file. */
220 struct dictionary *dict; /* Dictionary for output file. */
221 struct casewriter *writer; /* Writer. */
222 struct case_map *map; /* Map from input data to data for writer. */
224 /* Common options. */
225 bool print_map; /* Print map? TODO. */
226 bool print_short_names; /* Print long-to-short name map. TODO. */
227 struct sfm_write_options sysfile_opts;
228 struct pfm_write_options porfile_opts;
230 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
231 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
232 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
234 if (command_type == PROC_CMD)
235 *retain_unselected = true;
238 dict = dict_clone (dataset_dict (ds));
242 print_short_names = false;
243 sysfile_opts = sfm_writer_default_options ();
244 porfile_opts = pfm_writer_default_options ();
246 case_map_prepare_dict (dict);
247 dict_delete_scratch_vars (dict);
249 lex_match (lexer, '/');
252 if (lex_match_id (lexer, "OUTFILE"))
256 lex_sbc_only_once ("OUTFILE");
260 lex_match (lexer, '=');
262 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
266 else if (lex_match_id (lexer, "NAMES"))
267 print_short_names = true;
268 else if (lex_match_id (lexer, "PERMISSIONS"))
272 lex_match (lexer, '=');
273 if (lex_match_id (lexer, "READONLY"))
275 else if (lex_match_id (lexer, "WRITEABLE"))
279 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
282 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
284 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
286 lex_match (lexer, '=');
287 if (lex_match_id (lexer, "RETAIN"))
288 *retain_unselected = true;
289 else if (lex_match_id (lexer, "DELETE"))
290 *retain_unselected = false;
293 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
297 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
298 sysfile_opts.compress = true;
299 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
300 sysfile_opts.compress = false;
301 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
303 lex_match (lexer, '=');
304 if (!lex_force_int (lexer))
306 sysfile_opts.version = lex_integer (lexer);
309 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
311 lex_match (lexer, '=');
312 if (lex_match_id (lexer, "COMMUNICATIONS"))
313 porfile_opts.type = PFM_COMM;
314 else if (lex_match_id (lexer, "TAPE"))
315 porfile_opts.type = PFM_TAPE;
318 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
322 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
324 lex_match (lexer, '=');
325 if (!lex_force_int (lexer))
327 porfile_opts.digits = lex_integer (lexer);
330 else if (!parse_dict_trim (lexer, dict))
333 if (!lex_match (lexer, '/'))
336 if (lex_end_of_command (lexer) != CMD_SUCCESS)
341 lex_sbc_missing (lexer, "OUTFILE");
345 dict_delete_scratch_vars (dict);
346 dict_compact_values (dict);
348 if (fh_get_referent (handle) == FH_REF_FILE)
353 writer = sfm_open_writer (handle, dict, sysfile_opts);
356 writer = pfm_open_writer (handle, dict, porfile_opts);
361 writer = any_writer_open (handle, dict);
365 map = case_map_from_dict (dict);
367 writer = casewriter_create_translator (writer,
368 case_map_get_value_cnt (map),
370 get_destroy_case_map,
379 casewriter_destroy (writer);
381 case_map_destroy (map);
385 /* SAVE and EXPORT. */
387 /* Parses and performs the SAVE or EXPORT procedure. */
389 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
391 bool retain_unselected;
392 struct variable *saved_filter_variable;
393 struct casewriter *output;
396 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
399 return CMD_CASCADING_FAILURE;
401 saved_filter_variable = dict_get_filter (dataset_dict (ds));
402 if (retain_unselected)
403 dict_set_filter (dataset_dict (ds), NULL);
405 casereader_transfer (proc_open (ds), output);
406 ok = casewriter_destroy (output);
407 ok = proc_commit (ds) && ok;
409 dict_set_filter (dataset_dict (ds), saved_filter_variable);
411 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
415 cmd_save (struct lexer *lexer, struct dataset *ds)
417 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
421 cmd_export (struct lexer *lexer, struct dataset *ds)
423 return parse_output_proc (lexer, ds, PORFILE_WRITER);
426 /* XSAVE and XEXPORT. */
428 /* Transformation. */
431 struct casewriter *writer; /* Writer. */
434 static trns_proc_func output_trns_proc;
435 static trns_free_func output_trns_free;
437 /* Parses the XSAVE or XEXPORT transformation command. */
439 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
441 struct output_trns *t = xmalloc (sizeof *t);
442 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
443 if (t->writer == NULL)
446 return CMD_CASCADING_FAILURE;
449 add_transformation (ds, output_trns_proc, output_trns_free, t);
453 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
455 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
457 struct output_trns *t = trns_;
459 case_clone (&tmp, c);
460 casewriter_write (t->writer, &tmp);
461 return TRNS_CONTINUE;
464 /* Frees an XSAVE or XEXPORT transformation.
465 Returns true if successful, false if an I/O error occurred. */
467 output_trns_free (void *trns_)
469 struct output_trns *t = trns_;
470 bool ok = casewriter_destroy (t->writer);
477 cmd_xsave (struct lexer *lexer, struct dataset *ds)
479 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
482 /* XEXPORT command. */
484 cmd_xexport (struct lexer *lexer, struct dataset *ds)
486 return parse_output_trns (lexer, ds, PORFILE_WRITER);
489 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
490 static bool drop_variables (struct lexer *, struct dictionary *dict);
491 static bool keep_variables (struct lexer *, struct dictionary *dict);
493 /* Commands that read and write system files share a great deal
494 of common syntactic structure for rearranging and dropping
495 variables. This function parses this syntax and modifies DICT
496 appropriately. Returns true on success, false on failure. */
498 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
500 if (lex_match_id (lexer, "MAP"))
505 else if (lex_match_id (lexer, "DROP"))
506 return drop_variables (lexer, dict);
507 else if (lex_match_id (lexer, "KEEP"))
508 return keep_variables (lexer, dict);
509 else if (lex_match_id (lexer, "RENAME"))
510 return rename_variables (lexer, dict);
513 lex_error (lexer, _("expecting a valid subcommand"));
518 /* Parses and performs the RENAME subcommand of GET and SAVE. */
520 rename_variables (struct lexer *lexer, struct dictionary *dict)
533 lex_match (lexer, '=');
534 if (lex_token (lexer) != '(')
538 v = parse_variable (lexer, dict);
541 if (!lex_force_match (lexer, '=')
542 || !lex_force_id (lexer))
544 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
546 msg (SE, _("Cannot rename %s as %s because there already exists "
547 "a variable named %s. To rename variables with "
548 "overlapping names, use a single RENAME subcommand "
549 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
550 "\"/RENAME (A B C=B C A)\"."),
551 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
555 dict_rename_var (dict, v, lex_tokid (lexer));
564 while (lex_match (lexer, '('))
568 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
570 if (!lex_match (lexer, '='))
572 msg (SE, _("`=' expected after variable list."));
575 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
579 msg (SE, _("Number of variables on left side of `=' (%zu) does not "
580 "match number of variables on right side (%zu), in "
581 "parenthesized group %d of RENAME subcommand."),
582 nv - old_nv, nn - old_nv, group);
585 if (!lex_force_match (lexer, ')'))
590 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
592 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
598 for (i = 0; i < nn; i++)
606 /* Parses and performs the DROP subcommand of GET and SAVE.
607 Returns true if successful, false on failure.*/
609 drop_variables (struct lexer *lexer, struct dictionary *dict)
614 lex_match (lexer, '=');
615 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
617 dict_delete_vars (dict, v, nv);
620 if (dict_get_var_cnt (dict) == 0)
622 msg (SE, _("Cannot DROP all variables from dictionary."));
628 /* Parses and performs the KEEP subcommand of GET and SAVE.
629 Returns true if successful, false on failure.*/
631 keep_variables (struct lexer *lexer, struct dictionary *dict)
637 lex_match (lexer, '=');
638 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
641 /* Move the specified variables to the beginning. */
642 dict_reorder_vars (dict, v, nv);
644 /* Delete the remaining variables. */
645 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
646 for (i = nv; i < dict_get_var_cnt (dict); i++)
647 v[i - nv] = dict_get_var (dict, i);
648 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
659 MTF_FILE, /* Specified on FILE= subcommand. */
660 MTF_TABLE /* Specified on TABLE= subcommand. */
663 /* One of the FILEs or TABLEs on MATCH FILES. */
666 struct ll ll; /* In list of all files and tables. */
671 const struct variable **by; /* List of BY variables for this file. */
672 struct mtf_variable *vars; /* Variables to copy to output. */
673 size_t var_cnt; /* Number of other variables. */
675 struct file_handle *handle; /* Input file handle. */
676 struct dictionary *dict; /* Input file dictionary. */
677 struct casereader *reader; /* Input reader. */
678 struct ccase input; /* Input record (null at end of file). */
681 char *in_name; /* Variable name. */
682 struct variable *in_var; /* Variable (in master dictionary). */
687 struct variable *in_var;
688 struct variable *out_var;
691 /* MATCH FILES procedure. */
694 struct ll_list files; /* List of "struct mtf_file"s. */
695 int nonempty_files; /* FILEs that are not at end-of-file. */
697 bool ok; /* False if I/O error occurs. */
699 struct dictionary *dict; /* Dictionary of output file. */
700 struct casewriter *output; /* MATCH FILES output. */
702 size_t by_cnt; /* Number of variables on BY subcommand. */
705 Only if "first" or "last" is nonnull are the remaining
707 struct variable *first; /* Variable specified on FIRST (if any). */
708 struct variable *last; /* Variable specified on LAST (if any). */
709 struct ccase buffered_case; /* Case ready for output except that we don't
710 know the value for the LAST variable yet. */
711 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
712 const struct variable **prev_BY; /* Last set of BY variables. */
715 static void mtf_free (struct mtf_proc *);
717 static bool mtf_close_all_files (struct mtf_proc *);
718 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
719 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
721 static void mtf_process_case (struct mtf_proc *);
723 static bool create_flag_var (const char *subcommand_name, const char *var_name,
724 struct dictionary *, struct variable **);
725 static char *var_type_description (struct variable *);
727 /* Parse and execute the MATCH FILES command. */
729 cmd_match_files (struct lexer *lexer, struct dataset *ds)
732 struct ll *first_table;
733 struct mtf_file *file, *next;
736 struct casereader *active_file = NULL;
738 char first_name[VAR_NAME_LEN + 1] = "";
739 char last_name[VAR_NAME_LEN + 1] = "";
741 struct taint *taint = NULL;
745 ll_init (&mtf.files);
746 mtf.nonempty_files = 0;
747 first_table = ll_null (&mtf.files);
748 mtf.dict = dict_create ();
751 mtf.first = mtf.last = NULL;
752 case_nullify (&mtf.buffered_case);
753 case_nullify (&mtf.prev_BY_case);
756 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
758 lex_match (lexer, '/');
759 while (lex_token (lexer) == T_ID
760 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
761 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
763 struct mtf_file *file = xmalloc (sizeof *file);
768 file->in_name = NULL;
772 case_nullify (&file->input);
774 if (lex_match_id (lexer, "FILE"))
776 file->type = MTF_FILE;
777 ll_insert (first_table, &file->ll);
778 mtf.nonempty_files++;
780 else if (lex_match_id (lexer, "TABLE"))
782 file->type = MTF_TABLE;
783 ll_push_tail (&mtf.files, &file->ll);
784 if (first_table == ll_null (&mtf.files))
785 first_table = &file->ll;
789 lex_match (lexer, '=');
791 if (lex_match (lexer, '*'))
793 if (!proc_has_active_file (ds))
795 msg (SE, _("Cannot specify the active file since no active "
796 "file has been defined."));
800 if (proc_make_temporary_transformations_permanent (ds))
802 _("MATCH FILES may not be used after TEMPORARY when "
803 "the active file is an input source. "
804 "Temporary transformations will be made permanent."));
806 file->dict = dict_clone (dataset_dict (ds));
810 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
811 if (file->handle == NULL)
814 file->reader = any_reader_open (file->handle, &file->dict);
815 if (file->reader == NULL)
819 while (lex_match (lexer, '/'))
820 if (lex_match_id (lexer, "RENAME"))
822 if (!rename_variables (lexer, file->dict))
825 else if (lex_match_id (lexer, "IN"))
827 lex_match (lexer, '=');
828 if (lex_token (lexer) != T_ID)
830 lex_error (lexer, NULL);
834 if (file->in_name != NULL)
836 msg (SE, _("Multiple IN subcommands for a single FILE or "
840 file->in_name = xstrdup (lex_tokid (lexer));
845 mtf_merge_dictionary (mtf.dict, file);
848 while (lex_token (lexer) != '.')
850 if (lex_match (lexer, T_BY))
852 struct mtf_file *file;
853 struct variable **by;
858 lex_sbc_only_once ("BY");
862 lex_match (lexer, '=');
863 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
864 PV_NO_DUPLICATE | PV_NO_SCRATCH))
868 ll_for_each (file, struct mtf_file, ll, &mtf.files)
872 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
873 for (i = 0; i < mtf.by_cnt; i++)
875 const char *var_name = var_get_name (by[i]);
876 file->by[i] = dict_lookup_var (file->dict, var_name);
877 if (file->by[i] == NULL)
879 if (file->handle != NULL)
880 msg (SE, _("File %s lacks BY variable %s."),
881 fh_get_name (file->handle), var_name);
883 msg (SE, _("Active file lacks BY variable %s."),
894 else if (lex_match_id (lexer, "FIRST"))
896 if (first_name[0] != '\0')
898 lex_sbc_only_once ("FIRST");
902 lex_match (lexer, '=');
903 if (!lex_force_id (lexer))
905 strcpy (first_name, lex_tokid (lexer));
908 else if (lex_match_id (lexer, "LAST"))
910 if (last_name[0] != '\0')
912 lex_sbc_only_once ("LAST");
916 lex_match (lexer, '=');
917 if (!lex_force_id (lexer))
919 strcpy (last_name, lex_tokid (lexer));
922 else if (lex_match_id (lexer, "MAP"))
926 else if (lex_match_id (lexer, "DROP"))
928 if (!drop_variables (lexer, mtf.dict))
931 else if (lex_match_id (lexer, "KEEP"))
933 if (!keep_variables (lexer, mtf.dict))
938 lex_error (lexer, NULL);
942 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
944 lex_end_of_command (lexer);
951 if (first_table != ll_null (&mtf.files))
953 msg (SE, _("BY is required when TABLE is specified."));
958 msg (SE, _("BY is required when IN is specified."));
963 /* Set up mapping from each file's variables to master
965 ll_for_each (file, struct mtf_file, ll, &mtf.files)
967 size_t in_var_cnt = dict_get_var_cnt (file->dict);
969 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
971 for (i = 0; i < in_var_cnt; i++)
973 struct variable *in_var = dict_get_var (file->dict, i);
974 struct variable *out_var = dict_lookup_var (mtf.dict,
975 var_get_name (in_var));
979 struct mtf_variable *mv = &file->vars[file->var_cnt++];
981 mv->out_var = out_var;
986 /* Add IN, FIRST, and LAST variables to master dictionary. */
987 ll_for_each (file, struct mtf_file, ll, &mtf.files)
988 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
990 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
991 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
994 dict_delete_scratch_vars (mtf.dict);
995 dict_compact_values (mtf.dict);
996 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
997 taint = taint_clone (casewriter_get_taint (mtf.output));
999 ll_for_each (file, struct mtf_file, ll, &mtf.files)
1001 if (file->reader == NULL)
1003 if (active_file == NULL)
1005 proc_discard_output (ds);
1006 file->reader = active_file = proc_open (ds);
1009 file->reader = casereader_clone (active_file);
1011 taint_propagate (casereader_get_taint (file->reader), taint);
1014 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1015 mtf_read_record (&mtf, file);
1016 while (mtf.nonempty_files > 0)
1017 mtf_process_case (&mtf);
1018 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1020 if (mtf.last != NULL)
1021 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1022 casewriter_write (mtf.output, &mtf.buffered_case);
1023 case_nullify (&mtf.buffered_case);
1025 mtf_close_all_files (&mtf);
1026 if (active_file != NULL)
1029 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1035 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1038 if (active_file != NULL)
1041 taint_destroy (taint);
1042 return CMD_CASCADING_FAILURE;
1045 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1046 attempts to create a variable named VAR_NAME, with format
1047 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1048 Returns true if successful, false if the variable name is a
1049 duplicate (in which case a message saying that the variable
1050 specified on the given SUBCOMMAND is a duplicate is emitted).
1051 Also returns true, without doing anything, if VAR_NAME is null
1054 create_flag_var (const char *subcommand, const char *var_name,
1055 struct dictionary *dict, struct variable **var)
1057 if (var_name != NULL && var_name[0] != '\0')
1059 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1060 *var = dict_create_var (dict, var_name, 0);
1063 msg (SE, _("Variable name %s specified on %s subcommand "
1064 "duplicates an existing variable name."),
1065 subcommand, var_name);
1068 var_set_both_formats (*var, &format);
1075 /* Return a string in an allocated buffer describing V's variable
1078 var_type_description (struct variable *v)
1080 if (var_is_numeric (v))
1081 return xstrdup ("numeric");
1083 return xasprintf ("string with width %d", var_get_width (v));
1086 /* Closes all the files in MTF and frees their associated data.
1087 Returns true if successful, false if an I/O error occurred on
1088 any of the files. */
1090 mtf_close_all_files (struct mtf_proc *mtf)
1092 struct mtf_file *file;
1095 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1097 fh_unref (file->handle);
1098 casereader_destroy (file->reader);
1100 dict_destroy (file->dict);
1101 free (file->in_name);
1102 case_destroy (&file->input);
1110 /* Frees all the data for the MATCH FILES procedure. */
1112 mtf_free (struct mtf_proc *mtf)
1114 mtf_close_all_files (mtf);
1115 dict_destroy (mtf->dict);
1116 casewriter_destroy (mtf->output);
1117 case_destroy (&mtf->buffered_case);
1118 case_destroy (&mtf->prev_BY_case);
1121 /* Reads the next record into FILE, if possible, and update MTF's
1122 nonempty_files count if not. */
1124 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1126 case_destroy (&file->input);
1127 if (!casereader_read (file->reader, &file->input))
1129 mtf->nonempty_files--;
1136 /* Compare the BY variables for files A and B; return -1 if A <
1137 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1138 then all records are equal.) */
1140 mtf_compare_BY_values (struct mtf_proc *mtf,
1141 struct mtf_file *a, struct mtf_file *b)
1143 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1146 /* Processes input files and write one case to the output file. */
1148 mtf_process_case (struct mtf_proc *mtf)
1151 struct mtf_file *min;
1152 struct mtf_file *file;
1156 /* Find the set of one or more FILEs whose BY values are
1157 minimal, as well as the set of zero or more TABLEs whose BY
1158 values equal those of the minimum FILEs.
1160 After each iteration of the loop, this invariant holds: the
1161 FILEs with minimum BY values thus far have "sequence"
1162 members equal to min_sequence, and "min" points to one of
1163 the mtf_files whose case has those minimum BY values, and
1164 similarly for TABLEs. */
1167 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1168 if (case_is_null (&file->input))
1169 file->sequence = -1;
1170 else if (file->type == MTF_FILE)
1172 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1174 file->sequence = cmp < 0 ? -1 : min_sequence;
1177 file->sequence = ++min_sequence;
1184 assert (min != NULL);
1187 cmp = mtf_compare_BY_values (mtf, min, file);
1189 while (cmp > 0 && mtf_read_record (mtf, file));
1190 file->sequence = cmp == 0 ? min_sequence : -1;
1193 /* Form the output case from the input cases. */
1194 case_create (&c, dict_get_next_value_idx (mtf->dict));
1195 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1197 struct variable *v = dict_get_var (mtf->dict, i);
1198 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1200 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1202 bool include_file = file->sequence == min_sequence;
1204 for (i = 0; i < file->var_cnt; i++)
1206 const struct mtf_variable *mv = &file->vars[i];
1207 const union value *in = case_data (&file->input, mv->in_var);
1208 union value *out = case_data_rw (&c, mv->out_var);
1209 value_copy (out, in, var_get_width (mv->in_var));
1211 if (file->in_var != NULL)
1212 case_data_rw (&c, file->in_var)->f = include_file;
1215 /* Write the output case. */
1216 if (mtf->first == NULL && mtf->last == NULL)
1218 /* With no FIRST or LAST variables, it's trivial. */
1219 casewriter_write (mtf->output, &c);
1223 /* It's harder with LAST, because we can't know whether
1224 this case is the last in a group until we've prepared
1225 the *next* case also. Thus, we buffer the previous
1226 output case until the next one is ready.
1228 We also have to save a copy of one of the previous input
1229 cases, so that we can compare the BY variables. We
1230 can't compare the BY variables between the current
1231 output case and the saved one because the BY variables
1232 might not be in the output (the user is allowed to drop
1235 if (mtf->prev_BY != NULL)
1237 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1238 min->by, mtf->prev_BY,
1240 if (mtf->last != NULL)
1241 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1242 casewriter_write (mtf->output, &mtf->buffered_case);
1247 case_move (&mtf->buffered_case, &c);
1248 if (mtf->first != NULL)
1249 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1253 mtf->prev_BY = min->by;
1254 case_destroy (&mtf->prev_BY_case);
1255 case_clone (&mtf->prev_BY_case, &min->input);
1259 /* Read another record from each input file FILE with minimum
1261 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1262 if (file->type == MTF_FILE)
1264 if (file->sequence == min_sequence)
1265 mtf_read_record (mtf, file);
1271 /* Merge the dictionary for file F into master dictionary M. */
1273 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1275 struct dictionary *d = f->dict;
1276 const char *d_docs, *m_docs;
1279 if (dict_get_label (m) == NULL)
1280 dict_set_label (m, dict_get_label (d));
1282 d_docs = dict_get_documents (d);
1283 m_docs = dict_get_documents (m);
1287 dict_set_documents (m, d_docs);
1290 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1291 dict_set_documents (m, new_docs);
1296 for (i = 0; i < dict_get_var_cnt (d); i++)
1298 struct variable *dv = dict_get_var (d, i);
1299 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1301 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1306 if (var_get_width (mv) != var_get_width (dv))
1308 char *dv_description = var_type_description (dv);
1309 char *mv_description = var_type_description (mv);
1310 msg (SE, _("Variable %s in file %s (%s) has different "
1311 "type or width from the same variable in "
1312 "earlier file (%s)."),
1313 var_get_name (dv), fh_get_name (f->handle),
1314 dv_description, mv_description);
1315 free (dv_description);
1316 free (mv_description);
1320 if (var_get_width (dv) == var_get_width (mv))
1322 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1323 var_set_value_labels (mv, var_get_value_labels (dv));
1324 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1325 var_set_missing_values (mv, var_get_missing_values (dv));
1328 if (var_get_label (dv) && !var_get_label (mv))
1329 var_set_label (mv, var_get_label (dv));
1332 mv = dict_clone_var_assert (m, dv, var_get_name (dv));