1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
53 #define _(msgid) gettext (msgid)
55 static bool parse_dict_trim (struct lexer *, struct dictionary *);
57 /* Reading system and portable files. */
59 /* Type of command. */
66 static void get_translate_case (struct ccase *, struct ccase *, void *map_);
67 static bool get_destroy_case_map (void *map_);
69 /* Parses a GET or IMPORT command. */
71 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
73 struct casereader *reader = NULL;
74 struct file_handle *fh = NULL;
75 struct dictionary *dict = NULL;
76 struct case_map *map = NULL;
78 if ( type == GET_CMD && lex_match_id (lexer, "DATA") )
80 return parse_get_data_command (lexer, ds);
85 lex_match (lexer, '/');
87 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
89 lex_match (lexer, '=');
91 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
95 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
97 lex_match (lexer, '=');
99 if (lex_match_id (lexer, "COMM"))
101 else if (lex_match_id (lexer, "TAPE"))
105 lex_error (lexer, _("expecting COMM or TAPE"));
115 lex_sbc_missing (lexer, "FILE");
119 reader = any_reader_open (fh, &dict);
123 case_map_prepare_dict (dict);
125 while (lex_token (lexer) != '.')
127 lex_match (lexer, '/');
128 if (!parse_dict_trim (lexer, dict))
131 dict_compact_values (dict);
133 map = case_map_from_dict (dict);
135 reader = casereader_create_translator (reader,
136 dict_get_next_value_idx (dict),
138 get_destroy_case_map,
141 proc_set_active_file (ds, reader, dict);
146 casereader_destroy (reader);
149 return CMD_CASCADING_FAILURE;
153 get_translate_case (struct ccase *input, struct ccase *output,
156 struct case_map *map = map_;
157 case_map_execute (map, input, output);
158 case_destroy (input);
162 get_destroy_case_map (void *map_)
164 struct case_map *map = map_;
165 case_map_destroy (map);
171 cmd_get (struct lexer *lexer, struct dataset *ds)
173 return parse_read_command (lexer, ds, GET_CMD);
178 cmd_import (struct lexer *lexer, struct dataset *ds)
180 return parse_read_command (lexer, ds, IMPORT_CMD);
183 /* Writing system and portable files. */
185 /* Type of output file. */
188 SYSFILE_WRITER, /* System file. */
189 PORFILE_WRITER /* Portable file. */
192 /* Type of a command. */
195 XFORM_CMD, /* Transformation. */
196 PROC_CMD /* Procedure. */
199 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
200 WRITER_TYPE identifies the type of file to write,
201 and COMMAND_TYPE identifies the type of command.
203 On success, returns a writer.
204 For procedures only, sets *RETAIN_UNSELECTED to true if cases
205 that would otherwise be excluded by FILTER or USE should be
208 On failure, returns a null pointer. */
209 static struct casewriter *
210 parse_write_command (struct lexer *lexer, struct dataset *ds,
211 enum writer_type writer_type,
212 enum command_type command_type,
213 bool *retain_unselected)
216 struct file_handle *handle; /* Output file. */
217 struct dictionary *dict; /* Dictionary for output file. */
218 struct casewriter *writer; /* Writer. */
219 struct case_map *map; /* Map from input data to data for writer. */
221 /* Common options. */
222 bool print_map; /* Print map? TODO. */
223 bool print_short_names; /* Print long-to-short name map. TODO. */
224 struct sfm_write_options sysfile_opts;
225 struct pfm_write_options porfile_opts;
227 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
228 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
229 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
231 if (command_type == PROC_CMD)
232 *retain_unselected = true;
235 dict = dict_clone (dataset_dict (ds));
239 print_short_names = false;
240 sysfile_opts = sfm_writer_default_options ();
241 porfile_opts = pfm_writer_default_options ();
243 case_map_prepare_dict (dict);
244 dict_delete_scratch_vars (dict);
246 lex_match (lexer, '/');
249 if (lex_match_id (lexer, "OUTFILE"))
253 lex_sbc_only_once ("OUTFILE");
257 lex_match (lexer, '=');
259 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
263 else if (lex_match_id (lexer, "NAMES"))
264 print_short_names = true;
265 else if (lex_match_id (lexer, "PERMISSIONS"))
269 lex_match (lexer, '=');
270 if (lex_match_id (lexer, "READONLY"))
272 else if (lex_match_id (lexer, "WRITEABLE"))
276 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
279 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
281 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
283 lex_match (lexer, '=');
284 if (lex_match_id (lexer, "RETAIN"))
285 *retain_unselected = true;
286 else if (lex_match_id (lexer, "DELETE"))
287 *retain_unselected = false;
290 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
294 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
295 sysfile_opts.compress = true;
296 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
297 sysfile_opts.compress = false;
298 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
300 lex_match (lexer, '=');
301 if (!lex_force_int (lexer))
303 sysfile_opts.version = lex_integer (lexer);
306 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
308 lex_match (lexer, '=');
309 if (lex_match_id (lexer, "COMMUNICATIONS"))
310 porfile_opts.type = PFM_COMM;
311 else if (lex_match_id (lexer, "TAPE"))
312 porfile_opts.type = PFM_TAPE;
315 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
319 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
321 lex_match (lexer, '=');
322 if (!lex_force_int (lexer))
324 porfile_opts.digits = lex_integer (lexer);
327 else if (!parse_dict_trim (lexer, dict))
330 if (!lex_match (lexer, '/'))
333 if (lex_end_of_command (lexer) != CMD_SUCCESS)
338 lex_sbc_missing (lexer, "OUTFILE");
342 dict_delete_scratch_vars (dict);
343 dict_compact_values (dict);
345 if (fh_get_referent (handle) == FH_REF_FILE)
350 writer = sfm_open_writer (handle, dict, sysfile_opts);
353 writer = pfm_open_writer (handle, dict, porfile_opts);
358 writer = any_writer_open (handle, dict);
362 map = case_map_from_dict (dict);
364 writer = casewriter_create_translator (writer,
365 case_map_get_value_cnt (map),
367 get_destroy_case_map,
374 casewriter_destroy (writer);
376 case_map_destroy (map);
380 /* SAVE and EXPORT. */
382 /* Parses and performs the SAVE or EXPORT procedure. */
384 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
386 bool retain_unselected;
387 struct variable *saved_filter_variable;
388 struct casewriter *output;
391 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
394 return CMD_CASCADING_FAILURE;
396 saved_filter_variable = dict_get_filter (dataset_dict (ds));
397 if (retain_unselected)
398 dict_set_filter (dataset_dict (ds), NULL);
400 casereader_transfer (proc_open (ds), output);
401 ok = casewriter_destroy (output);
402 ok = proc_commit (ds) && ok;
404 dict_set_filter (dataset_dict (ds), saved_filter_variable);
406 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
410 cmd_save (struct lexer *lexer, struct dataset *ds)
412 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
416 cmd_export (struct lexer *lexer, struct dataset *ds)
418 return parse_output_proc (lexer, ds, PORFILE_WRITER);
421 /* XSAVE and XEXPORT. */
423 /* Transformation. */
426 struct casewriter *writer; /* Writer. */
429 static trns_proc_func output_trns_proc;
430 static trns_free_func output_trns_free;
432 /* Parses the XSAVE or XEXPORT transformation command. */
434 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
436 struct output_trns *t = xmalloc (sizeof *t);
437 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
438 if (t->writer == NULL)
441 return CMD_CASCADING_FAILURE;
444 add_transformation (ds, output_trns_proc, output_trns_free, t);
448 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
450 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
452 struct output_trns *t = trns_;
454 case_clone (&tmp, c);
455 casewriter_write (t->writer, &tmp);
456 return TRNS_CONTINUE;
459 /* Frees an XSAVE or XEXPORT transformation.
460 Returns true if successful, false if an I/O error occurred. */
462 output_trns_free (void *trns_)
464 struct output_trns *t = trns_;
465 bool ok = casewriter_destroy (t->writer);
472 cmd_xsave (struct lexer *lexer, struct dataset *ds)
474 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
477 /* XEXPORT command. */
479 cmd_xexport (struct lexer *lexer, struct dataset *ds)
481 return parse_output_trns (lexer, ds, PORFILE_WRITER);
484 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
485 static bool drop_variables (struct lexer *, struct dictionary *dict);
486 static bool keep_variables (struct lexer *, struct dictionary *dict);
488 /* Commands that read and write system files share a great deal
489 of common syntactic structure for rearranging and dropping
490 variables. This function parses this syntax and modifies DICT
491 appropriately. Returns true on success, false on failure. */
493 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
495 if (lex_match_id (lexer, "MAP"))
500 else if (lex_match_id (lexer, "DROP"))
501 return drop_variables (lexer, dict);
502 else if (lex_match_id (lexer, "KEEP"))
503 return keep_variables (lexer, dict);
504 else if (lex_match_id (lexer, "RENAME"))
505 return rename_variables (lexer, dict);
508 lex_error (lexer, _("expecting a valid subcommand"));
513 /* Parses and performs the RENAME subcommand of GET and SAVE. */
515 rename_variables (struct lexer *lexer, struct dictionary *dict)
528 lex_match (lexer, '=');
529 if (lex_token (lexer) != '(')
533 v = parse_variable (lexer, dict);
536 if (!lex_force_match (lexer, '=')
537 || !lex_force_id (lexer))
539 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
541 msg (SE, _("Cannot rename %s as %s because there already exists "
542 "a variable named %s. To rename variables with "
543 "overlapping names, use a single RENAME subcommand "
544 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
545 "\"/RENAME (A B C=B C A)\"."),
546 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
550 dict_rename_var (dict, v, lex_tokid (lexer));
559 while (lex_match (lexer, '('))
563 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
565 if (!lex_match (lexer, '='))
567 msg (SE, _("`=' expected after variable list."));
570 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
574 msg (SE, _("Number of variables on left side of `=' (%zu) does not "
575 "match number of variables on right side (%zu), in "
576 "parenthesized group %d of RENAME subcommand."),
577 nv - old_nv, nn - old_nv, group);
580 if (!lex_force_match (lexer, ')'))
585 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
587 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
593 for (i = 0; i < nn; i++)
601 /* Parses and performs the DROP subcommand of GET and SAVE.
602 Returns true if successful, false on failure.*/
604 drop_variables (struct lexer *lexer, struct dictionary *dict)
609 lex_match (lexer, '=');
610 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
612 dict_delete_vars (dict, v, nv);
615 if (dict_get_var_cnt (dict) == 0)
617 msg (SE, _("Cannot DROP all variables from dictionary."));
623 /* Parses and performs the KEEP subcommand of GET and SAVE.
624 Returns true if successful, false on failure.*/
626 keep_variables (struct lexer *lexer, struct dictionary *dict)
632 lex_match (lexer, '=');
633 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
636 /* Move the specified variables to the beginning. */
637 dict_reorder_vars (dict, v, nv);
639 /* Delete the remaining variables. */
640 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
641 for (i = nv; i < dict_get_var_cnt (dict); i++)
642 v[i - nv] = dict_get_var (dict, i);
643 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
654 MTF_FILE, /* Specified on FILE= subcommand. */
655 MTF_TABLE /* Specified on TABLE= subcommand. */
658 /* One of the FILEs or TABLEs on MATCH FILES. */
661 struct ll ll; /* In list of all files and tables. */
666 const struct variable **by; /* List of BY variables for this file. */
667 struct mtf_variable *vars; /* Variables to copy to output. */
668 size_t var_cnt; /* Number of other variables. */
670 struct file_handle *handle; /* Input file handle. */
671 struct dictionary *dict; /* Input file dictionary. */
672 struct casereader *reader; /* Input reader. */
673 struct ccase input; /* Input record (null at end of file). */
676 char *in_name; /* Variable name. */
677 struct variable *in_var; /* Variable (in master dictionary). */
682 struct variable *in_var;
683 struct variable *out_var;
686 /* MATCH FILES procedure. */
689 struct ll_list files; /* List of "struct mtf_file"s. */
690 int nonempty_files; /* FILEs that are not at end-of-file. */
692 bool ok; /* False if I/O error occurs. */
694 struct dictionary *dict; /* Dictionary of output file. */
695 struct casewriter *output; /* MATCH FILES output. */
697 size_t by_cnt; /* Number of variables on BY subcommand. */
700 Only if "first" or "last" is nonnull are the remaining
702 struct variable *first; /* Variable specified on FIRST (if any). */
703 struct variable *last; /* Variable specified on LAST (if any). */
704 struct ccase buffered_case; /* Case ready for output except that we don't
705 know the value for the LAST variable yet. */
706 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
707 const struct variable **prev_BY; /* Last set of BY variables. */
710 static void mtf_free (struct mtf_proc *);
712 static bool mtf_close_all_files (struct mtf_proc *);
713 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
714 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
716 static void mtf_process_case (struct mtf_proc *);
718 static bool create_flag_var (const char *subcommand_name, const char *var_name,
719 struct dictionary *, struct variable **);
720 static char *var_type_description (struct variable *);
722 /* Parse and execute the MATCH FILES command. */
724 cmd_match_files (struct lexer *lexer, struct dataset *ds)
727 struct ll *first_table;
728 struct mtf_file *file, *next;
731 struct casereader *active_file = NULL;
733 char first_name[LONG_NAME_LEN + 1] = "";
734 char last_name[LONG_NAME_LEN + 1] = "";
736 struct taint *taint = NULL;
740 ll_init (&mtf.files);
741 mtf.nonempty_files = 0;
742 first_table = ll_null (&mtf.files);
743 mtf.dict = dict_create ();
746 mtf.first = mtf.last = NULL;
747 case_nullify (&mtf.buffered_case);
748 case_nullify (&mtf.prev_BY_case);
751 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
753 lex_match (lexer, '/');
754 while (lex_token (lexer) == T_ID
755 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
756 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
758 struct mtf_file *file = xmalloc (sizeof *file);
763 file->in_name = NULL;
767 case_nullify (&file->input);
769 if (lex_match_id (lexer, "FILE"))
771 file->type = MTF_FILE;
772 ll_insert (first_table, &file->ll);
773 mtf.nonempty_files++;
775 else if (lex_match_id (lexer, "TABLE"))
777 file->type = MTF_TABLE;
778 ll_push_tail (&mtf.files, &file->ll);
779 if (first_table == ll_null (&mtf.files))
780 first_table = &file->ll;
784 lex_match (lexer, '=');
786 if (lex_match (lexer, '*'))
788 if (!proc_has_active_file (ds))
790 msg (SE, _("Cannot specify the active file since no active "
791 "file has been defined."));
795 if (proc_make_temporary_transformations_permanent (ds))
797 _("MATCH FILES may not be used after TEMPORARY when "
798 "the active file is an input source. "
799 "Temporary transformations will be made permanent."));
801 file->dict = dict_clone (dataset_dict (ds));
805 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
806 if (file->handle == NULL)
809 file->reader = any_reader_open (file->handle, &file->dict);
810 if (file->reader == NULL)
814 while (lex_match (lexer, '/'))
815 if (lex_match_id (lexer, "RENAME"))
817 if (!rename_variables (lexer, file->dict))
820 else if (lex_match_id (lexer, "IN"))
822 lex_match (lexer, '=');
823 if (lex_token (lexer) != T_ID)
825 lex_error (lexer, NULL);
829 if (file->in_name != NULL)
831 msg (SE, _("Multiple IN subcommands for a single FILE or "
835 file->in_name = xstrdup (lex_tokid (lexer));
840 mtf_merge_dictionary (mtf.dict, file);
843 while (lex_token (lexer) != '.')
845 if (lex_match (lexer, T_BY))
847 struct mtf_file *file;
848 struct variable **by;
853 lex_sbc_only_once ("BY");
857 lex_match (lexer, '=');
858 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
859 PV_NO_DUPLICATE | PV_NO_SCRATCH))
863 ll_for_each (file, struct mtf_file, ll, &mtf.files)
867 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
868 for (i = 0; i < mtf.by_cnt; i++)
870 const char *var_name = var_get_name (by[i]);
871 file->by[i] = dict_lookup_var (file->dict, var_name);
872 if (file->by[i] == NULL)
874 if (file->handle != NULL)
875 msg (SE, _("File %s lacks BY variable %s."),
876 fh_get_name (file->handle), var_name);
878 msg (SE, _("Active file lacks BY variable %s."),
889 else if (lex_match_id (lexer, "FIRST"))
891 if (first_name[0] != '\0')
893 lex_sbc_only_once ("FIRST");
897 lex_match (lexer, '=');
898 if (!lex_force_id (lexer))
900 strcpy (first_name, lex_tokid (lexer));
903 else if (lex_match_id (lexer, "LAST"))
905 if (last_name[0] != '\0')
907 lex_sbc_only_once ("LAST");
911 lex_match (lexer, '=');
912 if (!lex_force_id (lexer))
914 strcpy (last_name, lex_tokid (lexer));
917 else if (lex_match_id (lexer, "MAP"))
921 else if (lex_match_id (lexer, "DROP"))
923 if (!drop_variables (lexer, mtf.dict))
926 else if (lex_match_id (lexer, "KEEP"))
928 if (!keep_variables (lexer, mtf.dict))
933 lex_error (lexer, NULL);
937 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
939 lex_end_of_command (lexer);
946 if (first_table != ll_null (&mtf.files))
948 msg (SE, _("BY is required when TABLE is specified."));
953 msg (SE, _("BY is required when IN is specified."));
958 /* Set up mapping from each file's variables to master
960 ll_for_each (file, struct mtf_file, ll, &mtf.files)
962 size_t in_var_cnt = dict_get_var_cnt (file->dict);
964 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
966 for (i = 0; i < in_var_cnt; i++)
968 struct variable *in_var = dict_get_var (file->dict, i);
969 struct variable *out_var = dict_lookup_var (mtf.dict,
970 var_get_name (in_var));
974 struct mtf_variable *mv = &file->vars[file->var_cnt++];
976 mv->out_var = out_var;
981 /* Add IN, FIRST, and LAST variables to master dictionary. */
982 ll_for_each (file, struct mtf_file, ll, &mtf.files)
983 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
985 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
986 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
989 dict_delete_scratch_vars (mtf.dict);
990 dict_compact_values (mtf.dict);
991 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
992 taint = taint_clone (casewriter_get_taint (mtf.output));
994 ll_for_each (file, struct mtf_file, ll, &mtf.files)
996 if (file->reader == NULL)
998 if (active_file == NULL)
1000 proc_discard_output (ds);
1001 file->reader = active_file = proc_open (ds);
1004 file->reader = casereader_clone (active_file);
1006 taint_propagate (casereader_get_taint (file->reader), taint);
1009 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1010 mtf_read_record (&mtf, file);
1011 while (mtf.nonempty_files > 0)
1012 mtf_process_case (&mtf);
1013 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1015 if (mtf.last != NULL)
1016 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1017 casewriter_write (mtf.output, &mtf.buffered_case);
1018 case_nullify (&mtf.buffered_case);
1020 mtf_close_all_files (&mtf);
1021 if (active_file != NULL)
1024 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1030 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1033 if (active_file != NULL)
1036 taint_destroy (taint);
1037 return CMD_CASCADING_FAILURE;
1040 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1041 attempts to create a variable named VAR_NAME, with format
1042 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1043 Returns true if successful, false if the variable name is a
1044 duplicate (in which case a message saying that the variable
1045 specified on the given SUBCOMMAND is a duplicate is emitted).
1046 Also returns true, without doing anything, if VAR_NAME is null
1049 create_flag_var (const char *subcommand, const char *var_name,
1050 struct dictionary *dict, struct variable **var)
1052 if (var_name != NULL && var_name[0] != '\0')
1054 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1055 *var = dict_create_var (dict, var_name, 0);
1058 msg (SE, _("Variable name %s specified on %s subcommand "
1059 "duplicates an existing variable name."),
1060 subcommand, var_name);
1063 var_set_both_formats (*var, &format);
1070 /* Return a string in an allocated buffer describing V's variable
1073 var_type_description (struct variable *v)
1075 if (var_is_numeric (v))
1076 return xstrdup ("numeric");
1078 return xasprintf ("string with width %d", var_get_width (v));
1081 /* Closes all the files in MTF and frees their associated data.
1082 Returns true if successful, false if an I/O error occurred on
1083 any of the files. */
1085 mtf_close_all_files (struct mtf_proc *mtf)
1087 struct mtf_file *file;
1090 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1092 casereader_destroy (file->reader);
1094 dict_destroy (file->dict);
1095 free (file->in_name);
1096 case_destroy (&file->input);
1104 /* Frees all the data for the MATCH FILES procedure. */
1106 mtf_free (struct mtf_proc *mtf)
1108 mtf_close_all_files (mtf);
1109 dict_destroy (mtf->dict);
1110 casewriter_destroy (mtf->output);
1111 case_destroy (&mtf->buffered_case);
1112 case_destroy (&mtf->prev_BY_case);
1115 /* Reads the next record into FILE, if possible, and update MTF's
1116 nonempty_files count if not. */
1118 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1120 case_destroy (&file->input);
1121 if (!casereader_read (file->reader, &file->input))
1123 mtf->nonempty_files--;
1130 /* Compare the BY variables for files A and B; return -1 if A <
1131 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1132 then all records are equal.) */
1134 mtf_compare_BY_values (struct mtf_proc *mtf,
1135 struct mtf_file *a, struct mtf_file *b)
1137 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1140 /* Processes input files and write one case to the output file. */
1142 mtf_process_case (struct mtf_proc *mtf)
1145 struct mtf_file *min;
1146 struct mtf_file *file;
1150 /* Find the set of one or more FILEs whose BY values are
1151 minimal, as well as the set of zero or more TABLEs whose BY
1152 values equal those of the minimum FILEs.
1154 After each iteration of the loop, this invariant holds: the
1155 FILEs with minimum BY values thus far have "sequence"
1156 members equal to min_sequence, and "min" points to one of
1157 the mtf_files whose case has those minimum BY values, and
1158 similarly for TABLEs. */
1161 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1162 if (case_is_null (&file->input))
1163 file->sequence = -1;
1164 else if (file->type == MTF_FILE)
1166 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1168 file->sequence = cmp < 0 ? -1 : min_sequence;
1171 file->sequence = ++min_sequence;
1178 assert (min != NULL);
1181 cmp = mtf_compare_BY_values (mtf, min, file);
1183 while (cmp > 0 && mtf_read_record (mtf, file));
1184 file->sequence = cmp == 0 ? min_sequence : -1;
1187 /* Form the output case from the input cases. */
1188 case_create (&c, dict_get_next_value_idx (mtf->dict));
1189 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1191 struct variable *v = dict_get_var (mtf->dict, i);
1192 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1194 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1196 bool include_file = file->sequence == min_sequence;
1198 for (i = 0; i < file->var_cnt; i++)
1200 const struct mtf_variable *mv = &file->vars[i];
1201 const union value *in = case_data (&file->input, mv->in_var);
1202 union value *out = case_data_rw (&c, mv->out_var);
1203 value_copy (out, in, var_get_width (mv->in_var));
1205 if (file->in_var != NULL)
1206 case_data_rw (&c, file->in_var)->f = include_file;
1209 /* Write the output case. */
1210 if (mtf->first == NULL && mtf->last == NULL)
1212 /* With no FIRST or LAST variables, it's trivial. */
1213 casewriter_write (mtf->output, &c);
1217 /* It's harder with LAST, because we can't know whether
1218 this case is the last in a group until we've prepared
1219 the *next* case also. Thus, we buffer the previous
1220 output case until the next one is ready.
1222 We also have to save a copy of one of the previous input
1223 cases, so that we can compare the BY variables. We
1224 can't compare the BY variables between the current
1225 output case and the saved one because the BY variables
1226 might not be in the output (the user is allowed to drop
1229 if (mtf->prev_BY != NULL)
1231 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1232 min->by, mtf->prev_BY,
1234 if (mtf->last != NULL)
1235 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1236 casewriter_write (mtf->output, &mtf->buffered_case);
1241 case_move (&mtf->buffered_case, &c);
1242 if (mtf->first != NULL)
1243 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1247 mtf->prev_BY = min->by;
1248 case_destroy (&mtf->prev_BY_case);
1249 case_clone (&mtf->prev_BY_case, &min->input);
1253 /* Read another record from each input file FILE with minimum
1255 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1256 if (file->type == MTF_FILE)
1258 if (file->sequence == min_sequence)
1259 mtf_read_record (mtf, file);
1265 /* Merge the dictionary for file F into master dictionary M. */
1267 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1269 struct dictionary *d = f->dict;
1270 const char *d_docs, *m_docs;
1273 if (dict_get_label (m) == NULL)
1274 dict_set_label (m, dict_get_label (d));
1276 d_docs = dict_get_documents (d);
1277 m_docs = dict_get_documents (m);
1281 dict_set_documents (m, d_docs);
1284 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1285 dict_set_documents (m, new_docs);
1290 for (i = 0; i < dict_get_var_cnt (d); i++)
1292 struct variable *dv = dict_get_var (d, i);
1293 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1295 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1300 if (var_get_width (mv) != var_get_width (dv))
1302 char *dv_description = var_type_description (dv);
1303 char *mv_description = var_type_description (mv);
1304 msg (SE, _("Variable %s in file %s (%s) has different "
1305 "type or width from the same variable in "
1306 "earlier file (%s)."),
1307 var_get_name (dv), fh_get_name (f->handle),
1308 dv_description, mv_description);
1309 free (dv_description);
1310 free (mv_description);
1314 if (var_get_width (dv) == var_get_width (mv))
1316 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1317 var_set_value_labels (mv, var_get_value_labels (dv));
1318 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1319 var_set_missing_values (mv, var_get_missing_values (dv));
1322 if (var_get_label (dv) && !var_get_label (mv))
1323 var_set_label (mv, var_get_label (dv));
1326 mv = dict_clone_var_assert (m, dv, var_get_name (dv));