1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
51 #define _(msgid) gettext (msgid)
53 static bool parse_dict_trim (struct lexer *, struct dictionary *);
55 /* Reading system and portable files. */
57 /* Type of command. */
64 static void get_translate_case (struct ccase *, struct ccase *, void *map_);
65 static bool get_destroy_case_map (void *map_);
67 /* Parses a GET or IMPORT command. */
69 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
71 struct casereader *reader = NULL;
72 struct file_handle *fh = NULL;
73 struct dictionary *dict = NULL;
74 struct case_map *map = NULL;
78 lex_match (lexer, '/');
80 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
82 lex_match (lexer, '=');
84 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
88 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
90 lex_match (lexer, '=');
92 if (lex_match_id (lexer, "COMM"))
94 else if (lex_match_id (lexer, "TAPE"))
98 lex_error (lexer, _("expecting COMM or TAPE"));
108 lex_sbc_missing (lexer, "FILE");
112 reader = any_reader_open (fh, &dict);
116 case_map_prepare_dict (dict);
118 while (lex_token (lexer) != '.')
120 lex_match (lexer, '/');
121 if (!parse_dict_trim (lexer, dict))
124 dict_compact_values (dict);
126 map = case_map_from_dict (dict);
128 reader = casereader_create_translator (reader,
129 dict_get_next_value_idx (dict),
131 get_destroy_case_map,
134 proc_set_active_file (ds, reader, dict);
139 casereader_destroy (reader);
142 return CMD_CASCADING_FAILURE;
146 get_translate_case (struct ccase *input, struct ccase *output,
149 struct case_map *map = map_;
150 case_map_execute (map, input, output);
151 case_destroy (input);
155 get_destroy_case_map (void *map_)
157 struct case_map *map = map_;
158 case_map_destroy (map);
164 cmd_get (struct lexer *lexer, struct dataset *ds)
166 return parse_read_command (lexer, ds, GET_CMD);
171 cmd_import (struct lexer *lexer, struct dataset *ds)
173 return parse_read_command (lexer, ds, IMPORT_CMD);
176 /* Writing system and portable files. */
178 /* Type of output file. */
181 SYSFILE_WRITER, /* System file. */
182 PORFILE_WRITER /* Portable file. */
185 /* Type of a command. */
188 XFORM_CMD, /* Transformation. */
189 PROC_CMD /* Procedure. */
192 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
193 WRITER_TYPE identifies the type of file to write,
194 and COMMAND_TYPE identifies the type of command.
196 On success, returns a writer.
197 For procedures only, sets *RETAIN_UNSELECTED to true if cases
198 that would otherwise be excluded by FILTER or USE should be
201 On failure, returns a null pointer. */
202 static struct casewriter *
203 parse_write_command (struct lexer *lexer, struct dataset *ds,
204 enum writer_type writer_type,
205 enum command_type command_type,
206 bool *retain_unselected)
209 struct file_handle *handle; /* Output file. */
210 struct dictionary *dict; /* Dictionary for output file. */
211 struct casewriter *writer; /* Writer. */
212 struct case_map *map; /* Map from input data to data for writer. */
214 /* Common options. */
215 bool print_map; /* Print map? TODO. */
216 bool print_short_names; /* Print long-to-short name map. TODO. */
217 struct sfm_write_options sysfile_opts;
218 struct pfm_write_options porfile_opts;
220 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
221 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
222 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
224 if (command_type == PROC_CMD)
225 *retain_unselected = true;
228 dict = dict_clone (dataset_dict (ds));
232 print_short_names = false;
233 sysfile_opts = sfm_writer_default_options ();
234 porfile_opts = pfm_writer_default_options ();
236 case_map_prepare_dict (dict);
237 dict_delete_scratch_vars (dict);
239 lex_match (lexer, '/');
242 if (lex_match_id (lexer, "OUTFILE"))
246 lex_sbc_only_once ("OUTFILE");
250 lex_match (lexer, '=');
252 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
256 else if (lex_match_id (lexer, "NAMES"))
257 print_short_names = true;
258 else if (lex_match_id (lexer, "PERMISSIONS"))
262 lex_match (lexer, '=');
263 if (lex_match_id (lexer, "READONLY"))
265 else if (lex_match_id (lexer, "WRITEABLE"))
269 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
272 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
274 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
276 lex_match (lexer, '=');
277 if (lex_match_id (lexer, "RETAIN"))
278 *retain_unselected = true;
279 else if (lex_match_id (lexer, "DELETE"))
280 *retain_unselected = false;
283 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
287 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
288 sysfile_opts.compress = true;
289 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
290 sysfile_opts.compress = false;
291 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
293 lex_match (lexer, '=');
294 if (!lex_force_int (lexer))
296 sysfile_opts.version = lex_integer (lexer);
299 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
301 lex_match (lexer, '=');
302 if (lex_match_id (lexer, "COMMUNICATIONS"))
303 porfile_opts.type = PFM_COMM;
304 else if (lex_match_id (lexer, "TAPE"))
305 porfile_opts.type = PFM_TAPE;
308 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
312 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
314 lex_match (lexer, '=');
315 if (!lex_force_int (lexer))
317 porfile_opts.digits = lex_integer (lexer);
320 else if (!parse_dict_trim (lexer, dict))
323 if (!lex_match (lexer, '/'))
326 if (lex_end_of_command (lexer) != CMD_SUCCESS)
331 lex_sbc_missing (lexer, "OUTFILE");
335 dict_delete_scratch_vars (dict);
336 dict_compact_values (dict);
338 if (fh_get_referent (handle) == FH_REF_FILE)
343 writer = sfm_open_writer (handle, dict, sysfile_opts);
346 writer = pfm_open_writer (handle, dict, porfile_opts);
351 writer = any_writer_open (handle, dict);
355 map = case_map_from_dict (dict);
357 writer = casewriter_create_translator (writer,
358 case_map_get_value_cnt (map),
360 get_destroy_case_map,
367 casewriter_destroy (writer);
369 case_map_destroy (map);
373 /* SAVE and EXPORT. */
375 /* Parses and performs the SAVE or EXPORT procedure. */
377 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
379 bool retain_unselected;
380 struct variable *saved_filter_variable;
381 struct casewriter *output;
384 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
387 return CMD_CASCADING_FAILURE;
389 saved_filter_variable = dict_get_filter (dataset_dict (ds));
390 if (retain_unselected)
391 dict_set_filter (dataset_dict (ds), NULL);
393 casereader_transfer (proc_open (ds), output);
394 ok = casewriter_destroy (output);
395 ok = proc_commit (ds) && ok;
397 dict_set_filter (dataset_dict (ds), saved_filter_variable);
399 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
403 cmd_save (struct lexer *lexer, struct dataset *ds)
405 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
409 cmd_export (struct lexer *lexer, struct dataset *ds)
411 return parse_output_proc (lexer, ds, PORFILE_WRITER);
414 /* XSAVE and XEXPORT. */
416 /* Transformation. */
419 struct casewriter *writer; /* Writer. */
422 static trns_proc_func output_trns_proc;
423 static trns_free_func output_trns_free;
425 /* Parses the XSAVE or XEXPORT transformation command. */
427 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
429 struct output_trns *t = xmalloc (sizeof *t);
430 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
431 if (t->writer == NULL)
434 return CMD_CASCADING_FAILURE;
437 add_transformation (ds, output_trns_proc, output_trns_free, t);
441 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
443 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
445 struct output_trns *t = trns_;
447 case_clone (&tmp, c);
448 casewriter_write (t->writer, &tmp);
449 return TRNS_CONTINUE;
452 /* Frees an XSAVE or XEXPORT transformation.
453 Returns true if successful, false if an I/O error occurred. */
455 output_trns_free (void *trns_)
457 struct output_trns *t = trns_;
458 bool ok = casewriter_destroy (t->writer);
465 cmd_xsave (struct lexer *lexer, struct dataset *ds)
467 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
470 /* XEXPORT command. */
472 cmd_xexport (struct lexer *lexer, struct dataset *ds)
474 return parse_output_trns (lexer, ds, PORFILE_WRITER);
477 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
478 static bool drop_variables (struct lexer *, struct dictionary *dict);
479 static bool keep_variables (struct lexer *, struct dictionary *dict);
481 /* Commands that read and write system files share a great deal
482 of common syntactic structure for rearranging and dropping
483 variables. This function parses this syntax and modifies DICT
484 appropriately. Returns true on success, false on failure. */
486 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
488 if (lex_match_id (lexer, "MAP"))
493 else if (lex_match_id (lexer, "DROP"))
494 return drop_variables (lexer, dict);
495 else if (lex_match_id (lexer, "KEEP"))
496 return keep_variables (lexer, dict);
497 else if (lex_match_id (lexer, "RENAME"))
498 return rename_variables (lexer, dict);
501 lex_error (lexer, _("expecting a valid subcommand"));
506 /* Parses and performs the RENAME subcommand of GET and SAVE. */
508 rename_variables (struct lexer *lexer, struct dictionary *dict)
521 lex_match (lexer, '=');
522 if (lex_token (lexer) != '(')
526 v = parse_variable (lexer, dict);
529 if (!lex_force_match (lexer, '=')
530 || !lex_force_id (lexer))
532 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
534 msg (SE, _("Cannot rename %s as %s because there already exists "
535 "a variable named %s. To rename variables with "
536 "overlapping names, use a single RENAME subcommand "
537 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
538 "\"/RENAME (A B C=B C A)\"."),
539 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
543 dict_rename_var (dict, v, lex_tokid (lexer));
552 while (lex_match (lexer, '('))
556 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
558 if (!lex_match (lexer, '='))
560 msg (SE, _("`=' expected after variable list."));
563 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
567 msg (SE, _("Number of variables on left side of `=' (%d) does not "
568 "match number of variables on right side (%d), in "
569 "parenthesized group %d of RENAME subcommand."),
570 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
573 if (!lex_force_match (lexer, ')'))
578 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
580 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
586 for (i = 0; i < nn; i++)
594 /* Parses and performs the DROP subcommand of GET and SAVE.
595 Returns true if successful, false on failure.*/
597 drop_variables (struct lexer *lexer, struct dictionary *dict)
602 lex_match (lexer, '=');
603 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
605 dict_delete_vars (dict, v, nv);
608 if (dict_get_var_cnt (dict) == 0)
610 msg (SE, _("Cannot DROP all variables from dictionary."));
616 /* Parses and performs the KEEP subcommand of GET and SAVE.
617 Returns true if successful, false on failure.*/
619 keep_variables (struct lexer *lexer, struct dictionary *dict)
625 lex_match (lexer, '=');
626 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
629 /* Move the specified variables to the beginning. */
630 dict_reorder_vars (dict, v, nv);
632 /* Delete the remaining variables. */
633 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
634 for (i = nv; i < dict_get_var_cnt (dict); i++)
635 v[i - nv] = dict_get_var (dict, i);
636 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
647 MTF_FILE, /* Specified on FILE= subcommand. */
648 MTF_TABLE /* Specified on TABLE= subcommand. */
651 /* One of the FILEs or TABLEs on MATCH FILES. */
654 struct ll ll; /* In list of all files and tables. */
659 const struct variable **by; /* List of BY variables for this file. */
660 struct mtf_variable *vars; /* Variables to copy to output. */
661 size_t var_cnt; /* Number of other variables. */
663 struct file_handle *handle; /* Input file handle. */
664 struct dictionary *dict; /* Input file dictionary. */
665 struct casereader *reader; /* Input reader. */
666 struct ccase input; /* Input record (null at end of file). */
669 char *in_name; /* Variable name. */
670 struct variable *in_var; /* Variable (in master dictionary). */
675 struct variable *in_var;
676 struct variable *out_var;
679 /* MATCH FILES procedure. */
682 struct ll_list files; /* List of "struct mtf_file"s. */
683 int nonempty_files; /* FILEs that are not at end-of-file. */
685 bool ok; /* False if I/O error occurs. */
687 struct dictionary *dict; /* Dictionary of output file. */
688 struct casewriter *output; /* MATCH FILES output. */
690 size_t by_cnt; /* Number of variables on BY subcommand. */
693 Only if "first" or "last" is nonnull are the remaining
695 struct variable *first; /* Variable specified on FIRST (if any). */
696 struct variable *last; /* Variable specified on LAST (if any). */
697 struct ccase buffered_case; /* Case ready for output except that we don't
698 know the value for the LAST variable yet. */
699 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
700 const struct variable **prev_BY; /* Last set of BY variables. */
703 static void mtf_free (struct mtf_proc *);
705 static bool mtf_close_all_files (struct mtf_proc *);
706 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
707 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
709 static void mtf_process_case (struct mtf_proc *);
711 static bool create_flag_var (const char *subcommand_name, const char *var_name,
712 struct dictionary *, struct variable **);
713 static char *var_type_description (struct variable *);
715 /* Parse and execute the MATCH FILES command. */
717 cmd_match_files (struct lexer *lexer, struct dataset *ds)
720 struct ll *first_table;
721 struct mtf_file *file, *next;
724 struct casereader *active_file = NULL;
726 char first_name[LONG_NAME_LEN + 1] = "";
727 char last_name[LONG_NAME_LEN + 1] = "";
729 struct taint *taint = NULL;
733 ll_init (&mtf.files);
734 mtf.nonempty_files = 0;
735 first_table = ll_null (&mtf.files);
736 mtf.dict = dict_create ();
739 mtf.first = mtf.last = NULL;
740 case_nullify (&mtf.buffered_case);
741 case_nullify (&mtf.prev_BY_case);
744 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
746 lex_match (lexer, '/');
747 while (lex_token (lexer) == T_ID
748 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
749 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
751 struct mtf_file *file = xmalloc (sizeof *file);
756 file->in_name = NULL;
760 case_nullify (&file->input);
762 if (lex_match_id (lexer, "FILE"))
764 file->type = MTF_FILE;
765 ll_insert (first_table, &file->ll);
766 mtf.nonempty_files++;
768 else if (lex_match_id (lexer, "TABLE"))
770 file->type = MTF_TABLE;
771 ll_push_tail (&mtf.files, &file->ll);
772 if (first_table == ll_null (&mtf.files))
773 first_table = &file->ll;
777 lex_match (lexer, '=');
779 if (lex_match (lexer, '*'))
781 if (!proc_has_active_file (ds))
783 msg (SE, _("Cannot specify the active file since no active "
784 "file has been defined."));
788 if (proc_make_temporary_transformations_permanent (ds))
790 _("MATCH FILES may not be used after TEMPORARY when "
791 "the active file is an input source. "
792 "Temporary transformations will be made permanent."));
794 file->dict = dict_clone (dataset_dict (ds));
798 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
799 if (file->handle == NULL)
802 file->reader = any_reader_open (file->handle, &file->dict);
803 if (file->reader == NULL)
807 while (lex_match (lexer, '/'))
808 if (lex_match_id (lexer, "RENAME"))
810 if (!rename_variables (lexer, file->dict))
813 else if (lex_match_id (lexer, "IN"))
815 lex_match (lexer, '=');
816 if (lex_token (lexer) != T_ID)
818 lex_error (lexer, NULL);
822 if (file->in_name != NULL)
824 msg (SE, _("Multiple IN subcommands for a single FILE or "
828 file->in_name = xstrdup (lex_tokid (lexer));
833 mtf_merge_dictionary (mtf.dict, file);
836 while (lex_token (lexer) != '.')
838 if (lex_match (lexer, T_BY))
840 struct mtf_file *file;
841 struct variable **by;
846 lex_sbc_only_once ("BY");
850 lex_match (lexer, '=');
851 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
852 PV_NO_DUPLICATE | PV_NO_SCRATCH))
856 ll_for_each (file, struct mtf_file, ll, &mtf.files)
860 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
861 for (i = 0; i < mtf.by_cnt; i++)
863 const char *var_name = var_get_name (by[i]);
864 file->by[i] = dict_lookup_var (file->dict, var_name);
865 if (file->by[i] == NULL)
867 if (file->handle != NULL)
868 msg (SE, _("File %s lacks BY variable %s."),
869 fh_get_name (file->handle), var_name);
871 msg (SE, _("Active file lacks BY variable %s."),
882 else if (lex_match_id (lexer, "FIRST"))
884 if (first_name[0] != '\0')
886 lex_sbc_only_once ("FIRST");
890 lex_match (lexer, '=');
891 if (!lex_force_id (lexer))
893 strcpy (first_name, lex_tokid (lexer));
896 else if (lex_match_id (lexer, "LAST"))
898 if (last_name[0] != '\0')
900 lex_sbc_only_once ("LAST");
904 lex_match (lexer, '=');
905 if (!lex_force_id (lexer))
907 strcpy (last_name, lex_tokid (lexer));
910 else if (lex_match_id (lexer, "MAP"))
914 else if (lex_match_id (lexer, "DROP"))
916 if (!drop_variables (lexer, mtf.dict))
919 else if (lex_match_id (lexer, "KEEP"))
921 if (!keep_variables (lexer, mtf.dict))
926 lex_error (lexer, NULL);
930 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
932 lex_end_of_command (lexer);
939 if (first_table != ll_null (&mtf.files))
941 msg (SE, _("BY is required when TABLE is specified."));
946 msg (SE, _("BY is required when IN is specified."));
951 /* Set up mapping from each file's variables to master
953 ll_for_each (file, struct mtf_file, ll, &mtf.files)
955 size_t in_var_cnt = dict_get_var_cnt (file->dict);
957 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
959 for (i = 0; i < in_var_cnt; i++)
961 struct variable *in_var = dict_get_var (file->dict, i);
962 struct variable *out_var = dict_lookup_var (mtf.dict,
963 var_get_name (in_var));
967 struct mtf_variable *mv = &file->vars[file->var_cnt++];
969 mv->out_var = out_var;
974 /* Add IN, FIRST, and LAST variables to master dictionary. */
975 ll_for_each (file, struct mtf_file, ll, &mtf.files)
976 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
978 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
979 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
982 dict_delete_scratch_vars (mtf.dict);
983 dict_compact_values (mtf.dict);
984 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
985 taint = taint_clone (casewriter_get_taint (mtf.output));
987 ll_for_each (file, struct mtf_file, ll, &mtf.files)
989 if (file->reader == NULL)
991 if (active_file == NULL)
993 proc_discard_output (ds);
994 file->reader = active_file = proc_open (ds);
997 file->reader = casereader_clone (active_file);
999 taint_propagate (casereader_get_taint (file->reader), taint);
1002 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1003 mtf_read_record (&mtf, file);
1004 while (mtf.nonempty_files > 0)
1005 mtf_process_case (&mtf);
1006 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1008 if (mtf.last != NULL)
1009 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1010 casewriter_write (mtf.output, &mtf.buffered_case);
1011 case_nullify (&mtf.buffered_case);
1013 mtf_close_all_files (&mtf);
1014 if (active_file != NULL)
1017 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1023 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1026 if (active_file != NULL)
1029 taint_destroy (taint);
1030 return CMD_CASCADING_FAILURE;
1033 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1034 attempts to create a variable named VAR_NAME, with format
1035 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1036 Returns true if successful, false if the variable name is a
1037 duplicate (in which case a message saying that the variable
1038 specified on the given SUBCOMMAND is a duplicate is emitted).
1039 Also returns true, without doing anything, if VAR_NAME is null
1042 create_flag_var (const char *subcommand, const char *var_name,
1043 struct dictionary *dict, struct variable **var)
1045 if (var_name != NULL && var_name[0] != '\0')
1047 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1048 *var = dict_create_var (dict, var_name, 0);
1051 msg (SE, _("Variable name %s specified on %s subcommand "
1052 "duplicates an existing variable name."),
1053 subcommand, var_name);
1056 var_set_both_formats (*var, &format);
1063 /* Return a string in an allocated buffer describing V's variable
1066 var_type_description (struct variable *v)
1068 if (var_is_numeric (v))
1069 return xstrdup ("numeric");
1071 return xasprintf ("string with width %d", var_get_width (v));
1074 /* Closes all the files in MTF and frees their associated data.
1075 Returns true if successful, false if an I/O error occurred on
1076 any of the files. */
1078 mtf_close_all_files (struct mtf_proc *mtf)
1080 struct mtf_file *file;
1083 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1085 casereader_destroy (file->reader);
1087 dict_destroy (file->dict);
1088 free (file->in_name);
1089 case_destroy (&file->input);
1097 /* Frees all the data for the MATCH FILES procedure. */
1099 mtf_free (struct mtf_proc *mtf)
1101 mtf_close_all_files (mtf);
1102 dict_destroy (mtf->dict);
1103 casewriter_destroy (mtf->output);
1104 case_destroy (&mtf->buffered_case);
1105 case_destroy (&mtf->prev_BY_case);
1108 /* Reads the next record into FILE, if possible, and update MTF's
1109 nonempty_files count if not. */
1111 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1113 case_destroy (&file->input);
1114 if (!casereader_read (file->reader, &file->input))
1116 mtf->nonempty_files--;
1123 /* Compare the BY variables for files A and B; return -1 if A <
1124 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1125 then all records are equal.) */
1127 mtf_compare_BY_values (struct mtf_proc *mtf,
1128 struct mtf_file *a, struct mtf_file *b)
1130 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1133 /* Processes input files and write one case to the output file. */
1135 mtf_process_case (struct mtf_proc *mtf)
1138 struct mtf_file *min;
1139 struct mtf_file *file;
1143 /* Find the set of one or more FILEs whose BY values are
1144 minimal, as well as the set of zero or more TABLEs whose BY
1145 values equal those of the minimum FILEs.
1147 After each iteration of the loop, this invariant holds: the
1148 FILEs with minimum BY values thus far have "sequence"
1149 members equal to min_sequence, and "min" points to one of
1150 the mtf_files whose case has those minimum BY values, and
1151 similarly for TABLEs. */
1154 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1155 if (case_is_null (&file->input))
1156 file->sequence = -1;
1157 else if (file->type == MTF_FILE)
1159 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1161 file->sequence = cmp < 0 ? -1 : min_sequence;
1164 file->sequence = ++min_sequence;
1171 assert (min != NULL);
1174 cmp = mtf_compare_BY_values (mtf, min, file);
1176 while (cmp > 0 && mtf_read_record (mtf, file));
1177 file->sequence = cmp == 0 ? min_sequence : -1;
1180 /* Form the output case from the input cases. */
1181 case_create (&c, dict_get_next_value_idx (mtf->dict));
1182 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1184 struct variable *v = dict_get_var (mtf->dict, i);
1185 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1187 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1189 bool include_file = file->sequence == min_sequence;
1191 for (i = 0; i < file->var_cnt; i++)
1193 const struct mtf_variable *mv = &file->vars[i];
1194 const union value *in = case_data (&file->input, mv->in_var);
1195 union value *out = case_data_rw (&c, mv->out_var);
1196 value_copy (out, in, var_get_width (mv->in_var));
1198 if (file->in_var != NULL)
1199 case_data_rw (&c, file->in_var)->f = include_file;
1202 /* Write the output case. */
1203 if (mtf->first == NULL && mtf->last == NULL)
1205 /* With no FIRST or LAST variables, it's trivial. */
1206 casewriter_write (mtf->output, &c);
1210 /* It's harder with LAST, because we can't know whether
1211 this case is the last in a group until we've prepared
1212 the *next* case also. Thus, we buffer the previous
1213 output case until the next one is ready.
1215 We also have to save a copy of one of the previous input
1216 cases, so that we can compare the BY variables. We
1217 can't compare the BY variables between the current
1218 output case and the saved one because the BY variables
1219 might not be in the output (the user is allowed to drop
1222 if (mtf->prev_BY != NULL)
1224 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1225 min->by, mtf->prev_BY,
1227 if (mtf->last != NULL)
1228 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1229 casewriter_write (mtf->output, &mtf->buffered_case);
1234 case_move (&mtf->buffered_case, &c);
1235 if (mtf->first != NULL)
1236 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1240 mtf->prev_BY = min->by;
1241 case_destroy (&mtf->prev_BY_case);
1242 case_clone (&mtf->prev_BY_case, &min->input);
1246 /* Read another record from each input file FILE with minimum
1248 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1249 if (file->type == MTF_FILE)
1251 if (file->sequence == min_sequence)
1252 mtf_read_record (mtf, file);
1258 /* Merge the dictionary for file F into master dictionary M. */
1260 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1262 struct dictionary *d = f->dict;
1263 const char *d_docs, *m_docs;
1266 if (dict_get_label (m) == NULL)
1267 dict_set_label (m, dict_get_label (d));
1269 d_docs = dict_get_documents (d);
1270 m_docs = dict_get_documents (m);
1274 dict_set_documents (m, d_docs);
1277 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1278 dict_set_documents (m, new_docs);
1283 for (i = 0; i < dict_get_var_cnt (d); i++)
1285 struct variable *dv = dict_get_var (d, i);
1286 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1288 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1293 if (var_get_width (mv) != var_get_width (dv))
1295 char *dv_description = var_type_description (dv);
1296 char *mv_description = var_type_description (mv);
1297 msg (SE, _("Variable %s in file %s (%s) has different "
1298 "type or width from the same variable in "
1299 "earlier file (%s)."),
1300 var_get_name (dv), fh_get_name (f->handle),
1301 dv_description, mv_description);
1302 free (dv_description);
1303 free (mv_description);
1307 if (var_get_width (dv) == var_get_width (mv))
1309 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1310 var_set_value_labels (mv, var_get_value_labels (dv));
1311 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1312 var_set_missing_values (mv, var_get_missing_values (dv));
1315 if (var_get_label (dv) && !var_get_label (mv))
1316 var_set_label (mv, var_get_label (dv));
1319 mv = dict_clone_var_assert (m, dv, var_get_name (dv));