1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/alloc.h>
41 #include <libpspp/assertion.h>
42 #include <libpspp/compiler.h>
43 #include <libpspp/hash.h>
44 #include <libpspp/message.h>
45 #include <libpspp/misc.h>
46 #include <libpspp/str.h>
47 #include <libpspp/taint.h>
50 #define _(msgid) gettext (msgid)
52 static bool parse_dict_trim (struct lexer *, struct dictionary *);
54 /* Reading system and portable files. */
56 /* Type of command. */
63 static void get_translate_case (const struct ccase *, struct ccase *,
65 static bool get_destroy_case_map (void *map_);
67 /* Parses a GET or IMPORT command. */
69 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
71 struct casereader *reader = NULL;
72 struct file_handle *fh = NULL;
73 struct dictionary *dict = NULL;
74 struct case_map *map = NULL;
78 lex_match (lexer, '/');
80 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
82 lex_match (lexer, '=');
84 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
88 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
90 lex_match (lexer, '=');
92 if (lex_match_id (lexer, "COMM"))
94 else if (lex_match_id (lexer, "TAPE"))
98 lex_error (lexer, _("expecting COMM or TAPE"));
108 lex_sbc_missing (lexer, "FILE");
112 reader = any_reader_open (fh, &dict);
116 case_map_prepare_dict (dict);
118 while (lex_token (lexer) != '.')
120 lex_match (lexer, '/');
121 if (!parse_dict_trim (lexer, dict))
125 map = case_map_from_dict (dict);
127 reader = casereader_create_translator (reader,
128 dict_get_next_value_idx (dict),
130 get_destroy_case_map,
133 proc_set_active_file (ds, reader, dict);
138 casereader_destroy (reader);
141 return CMD_CASCADING_FAILURE;
145 get_translate_case (const struct ccase *input, struct ccase *output,
148 struct case_map *map = map_;
149 case_map_execute (map, input, output);
153 get_destroy_case_map (void *map_)
155 struct case_map *map = map_;
156 case_map_destroy (map);
162 cmd_get (struct lexer *lexer, struct dataset *ds)
164 return parse_read_command (lexer, ds, GET_CMD);
169 cmd_import (struct lexer *lexer, struct dataset *ds)
171 return parse_read_command (lexer, ds, IMPORT_CMD);
174 /* Writing system and portable files. */
176 /* Type of output file. */
179 SYSFILE_WRITER, /* System file. */
180 PORFILE_WRITER /* Portable file. */
183 /* Type of a command. */
186 XFORM_CMD, /* Transformation. */
187 PROC_CMD /* Procedure. */
190 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
191 WRITER_TYPE identifies the type of file to write,
192 and COMMAND_TYPE identifies the type of command.
194 On success, returns a writer.
195 For procedures only, sets *RETAIN_UNSELECTED to true if cases
196 that would otherwise be excluded by FILTER or USE should be
199 On failure, returns a null pointer. */
200 static struct casewriter *
201 parse_write_command (struct lexer *lexer, struct dataset *ds,
202 enum writer_type writer_type,
203 enum command_type command_type,
204 bool *retain_unselected)
207 struct file_handle *handle; /* Output file. */
208 struct dictionary *dict; /* Dictionary for output file. */
209 struct casewriter *writer; /* Writer. */
210 struct case_map *map; /* Map from input data to data for writer. */
212 /* Common options. */
213 bool print_map; /* Print map? TODO. */
214 bool print_short_names; /* Print long-to-short name map. TODO. */
215 struct sfm_write_options sysfile_opts;
216 struct pfm_write_options porfile_opts;
218 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
219 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
220 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
222 if (command_type == PROC_CMD)
223 *retain_unselected = true;
226 dict = dict_clone (dataset_dict (ds));
230 print_short_names = false;
231 sysfile_opts = sfm_writer_default_options ();
232 porfile_opts = pfm_writer_default_options ();
234 case_map_prepare_dict (dict);
235 dict_delete_scratch_vars (dict);
237 lex_match (lexer, '/');
240 if (lex_match_id (lexer, "OUTFILE"))
244 lex_sbc_only_once ("OUTFILE");
248 lex_match (lexer, '=');
250 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
254 else if (lex_match_id (lexer, "NAMES"))
255 print_short_names = true;
256 else if (lex_match_id (lexer, "PERMISSIONS"))
260 lex_match (lexer, '=');
261 if (lex_match_id (lexer, "READONLY"))
263 else if (lex_match_id (lexer, "WRITEABLE"))
267 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
270 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
272 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
274 lex_match (lexer, '=');
275 if (lex_match_id (lexer, "RETAIN"))
276 *retain_unselected = true;
277 else if (lex_match_id (lexer, "DELETE"))
278 *retain_unselected = false;
281 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
285 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
286 sysfile_opts.compress = true;
287 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
288 sysfile_opts.compress = false;
289 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
291 lex_match (lexer, '=');
292 if (!lex_force_int (lexer))
294 sysfile_opts.version = lex_integer (lexer);
297 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
299 lex_match (lexer, '=');
300 if (lex_match_id (lexer, "COMMUNICATIONS"))
301 porfile_opts.type = PFM_COMM;
302 else if (lex_match_id (lexer, "TAPE"))
303 porfile_opts.type = PFM_TAPE;
306 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
310 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
312 lex_match (lexer, '=');
313 if (!lex_force_int (lexer))
315 porfile_opts.digits = lex_integer (lexer);
318 else if (!parse_dict_trim (lexer, dict))
321 if (!lex_match (lexer, '/'))
324 if (lex_end_of_command (lexer) != CMD_SUCCESS)
329 lex_sbc_missing (lexer, "OUTFILE");
333 dict_delete_scratch_vars (dict);
334 dict_compact_values (dict);
336 if (fh_get_referent (handle) == FH_REF_FILE)
341 writer = sfm_open_writer (handle, dict, sysfile_opts);
344 writer = pfm_open_writer (handle, dict, porfile_opts);
349 writer = any_writer_open (handle, dict);
353 map = case_map_from_dict (dict);
355 writer = casewriter_create_translator (writer,
356 case_map_get_value_cnt (map),
358 get_destroy_case_map,
365 casewriter_destroy (writer);
367 case_map_destroy (map);
371 /* SAVE and EXPORT. */
373 /* Parses and performs the SAVE or EXPORT procedure. */
375 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
377 bool retain_unselected;
378 struct variable *saved_filter_variable;
379 struct casewriter *output;
382 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
385 return CMD_CASCADING_FAILURE;
387 saved_filter_variable = dict_get_filter (dataset_dict (ds));
388 if (retain_unselected)
389 dict_set_filter (dataset_dict (ds), NULL);
391 casereader_transfer (proc_open (ds), output);
392 ok = casewriter_destroy (output);
393 ok = proc_commit (ds) && ok;
395 dict_set_filter (dataset_dict (ds), saved_filter_variable);
397 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
401 cmd_save (struct lexer *lexer, struct dataset *ds)
403 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
407 cmd_export (struct lexer *lexer, struct dataset *ds)
409 return parse_output_proc (lexer, ds, PORFILE_WRITER);
412 /* XSAVE and XEXPORT. */
414 /* Transformation. */
417 struct casewriter *writer; /* Writer. */
420 static trns_proc_func output_trns_proc;
421 static trns_free_func output_trns_free;
423 /* Parses the XSAVE or XEXPORT transformation command. */
425 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
427 struct output_trns *t = xmalloc (sizeof *t);
428 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
429 if (t->writer == NULL)
432 return CMD_CASCADING_FAILURE;
435 add_transformation (ds, output_trns_proc, output_trns_free, t);
439 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
441 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
443 struct output_trns *t = trns_;
445 case_clone (&tmp, c);
446 casewriter_write (t->writer, &tmp);
447 return TRNS_CONTINUE;
450 /* Frees an XSAVE or XEXPORT transformation.
451 Returns true if successful, false if an I/O error occurred. */
453 output_trns_free (void *trns_)
455 struct output_trns *t = trns_;
456 bool ok = casewriter_destroy (t->writer);
463 cmd_xsave (struct lexer *lexer, struct dataset *ds)
465 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
468 /* XEXPORT command. */
470 cmd_xexport (struct lexer *lexer, struct dataset *ds)
472 return parse_output_trns (lexer, ds, PORFILE_WRITER);
475 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
476 static bool drop_variables (struct lexer *, struct dictionary *dict);
477 static bool keep_variables (struct lexer *, struct dictionary *dict);
479 /* Commands that read and write system files share a great deal
480 of common syntactic structure for rearranging and dropping
481 variables. This function parses this syntax and modifies DICT
482 appropriately. Returns true on success, false on failure. */
484 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
486 if (lex_match_id (lexer, "MAP"))
491 else if (lex_match_id (lexer, "DROP"))
492 return drop_variables (lexer, dict);
493 else if (lex_match_id (lexer, "KEEP"))
494 return keep_variables (lexer, dict);
495 else if (lex_match_id (lexer, "RENAME"))
496 return rename_variables (lexer, dict);
499 lex_error (lexer, _("expecting a valid subcommand"));
504 /* Parses and performs the RENAME subcommand of GET and SAVE. */
506 rename_variables (struct lexer *lexer, struct dictionary *dict)
519 lex_match (lexer, '=');
520 if (lex_token (lexer) != '(')
524 v = parse_variable (lexer, dict);
527 if (!lex_force_match (lexer, '=')
528 || !lex_force_id (lexer))
530 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
532 msg (SE, _("Cannot rename %s as %s because there already exists "
533 "a variable named %s. To rename variables with "
534 "overlapping names, use a single RENAME subcommand "
535 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
536 "\"/RENAME (A B C=B C A)\"."),
537 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
541 dict_rename_var (dict, v, lex_tokid (lexer));
550 while (lex_match (lexer, '('))
554 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
556 if (!lex_match (lexer, '='))
558 msg (SE, _("`=' expected after variable list."));
561 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
565 msg (SE, _("Number of variables on left side of `=' (%d) does not "
566 "match number of variables on right side (%d), in "
567 "parenthesized group %d of RENAME subcommand."),
568 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
571 if (!lex_force_match (lexer, ')'))
576 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
578 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
584 for (i = 0; i < nn; i++)
592 /* Parses and performs the DROP subcommand of GET and SAVE.
593 Returns true if successful, false on failure.*/
595 drop_variables (struct lexer *lexer, struct dictionary *dict)
600 lex_match (lexer, '=');
601 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
603 dict_delete_vars (dict, v, nv);
606 if (dict_get_var_cnt (dict) == 0)
608 msg (SE, _("Cannot DROP all variables from dictionary."));
614 /* Parses and performs the KEEP subcommand of GET and SAVE.
615 Returns true if successful, false on failure.*/
617 keep_variables (struct lexer *lexer, struct dictionary *dict)
623 lex_match (lexer, '=');
624 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
627 /* Move the specified variables to the beginning. */
628 dict_reorder_vars (dict, v, nv);
630 /* Delete the remaining variables. */
631 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
632 for (i = nv; i < dict_get_var_cnt (dict); i++)
633 v[i - nv] = dict_get_var (dict, i);
634 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
645 MTF_FILE, /* Specified on FILE= subcommand. */
646 MTF_TABLE /* Specified on TABLE= subcommand. */
649 /* One of the FILEs or TABLEs on MATCH FILES. */
652 struct ll ll; /* In list of all files and tables. */
657 const struct variable **by; /* List of BY variables for this file. */
658 struct mtf_variable *vars; /* Variables to copy to output. */
659 size_t var_cnt; /* Number of other variables. */
661 struct file_handle *handle; /* Input file handle. */
662 struct dictionary *dict; /* Input file dictionary. */
663 struct casereader *reader; /* Input reader. */
664 struct ccase input; /* Input record (null at end of file). */
667 char *in_name; /* Variable name. */
668 struct variable *in_var; /* Variable (in master dictionary). */
673 struct variable *in_var;
674 struct variable *out_var;
677 /* MATCH FILES procedure. */
680 struct ll_list files; /* List of "struct mtf_file"s. */
681 int nonempty_files; /* FILEs that are not at end-of-file. */
683 bool ok; /* False if I/O error occurs. */
685 struct dictionary *dict; /* Dictionary of output file. */
686 struct casewriter *output; /* MATCH FILES output. */
688 size_t by_cnt; /* Number of variables on BY subcommand. */
691 Only if "first" or "last" is nonnull are the remaining
693 struct variable *first; /* Variable specified on FIRST (if any). */
694 struct variable *last; /* Variable specified on LAST (if any). */
695 struct ccase buffered_case; /* Case ready for output except that we don't
696 know the value for the LAST variable yet. */
697 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
698 const struct variable **prev_BY; /* Last set of BY variables. */
701 static void mtf_free (struct mtf_proc *);
703 static bool mtf_close_all_files (struct mtf_proc *);
704 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
705 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
707 static void mtf_process_case (struct mtf_proc *);
709 static bool create_flag_var (const char *subcommand_name, const char *var_name,
710 struct dictionary *, struct variable **);
711 static char *var_type_description (struct variable *);
713 /* Parse and execute the MATCH FILES command. */
715 cmd_match_files (struct lexer *lexer, struct dataset *ds)
718 struct ll *first_table;
719 struct mtf_file *file, *next;
722 struct casereader *active_file = NULL;
724 char first_name[LONG_NAME_LEN + 1] = "";
725 char last_name[LONG_NAME_LEN + 1] = "";
727 struct taint *taint = NULL;
731 ll_init (&mtf.files);
732 mtf.nonempty_files = 0;
733 first_table = ll_null (&mtf.files);
734 mtf.dict = dict_create ();
737 mtf.first = mtf.last = NULL;
738 case_nullify (&mtf.buffered_case);
739 case_nullify (&mtf.prev_BY_case);
742 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
744 lex_match (lexer, '/');
745 while (lex_token (lexer) == T_ID
746 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
747 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
749 struct mtf_file *file = xmalloc (sizeof *file);
754 file->in_name = NULL;
758 case_nullify (&file->input);
760 if (lex_match_id (lexer, "FILE"))
762 file->type = MTF_FILE;
763 ll_insert (first_table, &file->ll);
764 mtf.nonempty_files++;
766 else if (lex_match_id (lexer, "TABLE"))
768 file->type = MTF_TABLE;
769 ll_push_tail (&mtf.files, &file->ll);
770 if (first_table == ll_null (&mtf.files))
771 first_table = &file->ll;
775 lex_match (lexer, '=');
777 if (lex_match (lexer, '*'))
779 if (!proc_has_active_file (ds))
781 msg (SE, _("Cannot specify the active file since no active "
782 "file has been defined."));
786 if (proc_make_temporary_transformations_permanent (ds))
788 _("MATCH FILES may not be used after TEMPORARY when "
789 "the active file is an input source. "
790 "Temporary transformations will be made permanent."));
792 file->dict = dict_clone (dataset_dict (ds));
796 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
797 if (file->handle == NULL)
800 file->reader = any_reader_open (file->handle, &file->dict);
801 if (file->reader == NULL)
805 while (lex_match (lexer, '/'))
806 if (lex_match_id (lexer, "RENAME"))
808 if (!rename_variables (lexer, file->dict))
811 else if (lex_match_id (lexer, "IN"))
813 lex_match (lexer, '=');
814 if (lex_token (lexer) != T_ID)
816 lex_error (lexer, NULL);
820 if (file->in_name != NULL)
822 msg (SE, _("Multiple IN subcommands for a single FILE or "
826 file->in_name = xstrdup (lex_tokid (lexer));
831 mtf_merge_dictionary (mtf.dict, file);
834 while (lex_token (lexer) != '.')
836 if (lex_match (lexer, T_BY))
838 struct mtf_file *file;
839 struct variable **by;
844 lex_sbc_only_once ("BY");
848 lex_match (lexer, '=');
849 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
850 PV_NO_DUPLICATE | PV_NO_SCRATCH))
854 ll_for_each (file, struct mtf_file, ll, &mtf.files)
858 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
859 for (i = 0; i < mtf.by_cnt; i++)
861 const char *var_name = var_get_name (by[i]);
862 file->by[i] = dict_lookup_var (file->dict, var_name);
863 if (file->by[i] == NULL)
865 if (file->handle != NULL)
866 msg (SE, _("File %s lacks BY variable %s."),
867 fh_get_name (file->handle), var_name);
869 msg (SE, _("Active file lacks BY variable %s."),
880 else if (lex_match_id (lexer, "FIRST"))
882 if (first_name[0] != '\0')
884 lex_sbc_only_once ("FIRST");
888 lex_match (lexer, '=');
889 if (!lex_force_id (lexer))
891 strcpy (first_name, lex_tokid (lexer));
894 else if (lex_match_id (lexer, "LAST"))
896 if (last_name[0] != '\0')
898 lex_sbc_only_once ("LAST");
902 lex_match (lexer, '=');
903 if (!lex_force_id (lexer))
905 strcpy (last_name, lex_tokid (lexer));
908 else if (lex_match_id (lexer, "MAP"))
912 else if (lex_match_id (lexer, "DROP"))
914 if (!drop_variables (lexer, mtf.dict))
917 else if (lex_match_id (lexer, "KEEP"))
919 if (!keep_variables (lexer, mtf.dict))
924 lex_error (lexer, NULL);
928 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
930 lex_end_of_command (lexer);
937 if (first_table != ll_null (&mtf.files))
939 msg (SE, _("BY is required when TABLE is specified."));
944 msg (SE, _("BY is required when IN is specified."));
949 /* Set up mapping from each file's variables to master
951 ll_for_each (file, struct mtf_file, ll, &mtf.files)
953 size_t in_var_cnt = dict_get_var_cnt (file->dict);
955 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
957 for (i = 0; i < in_var_cnt; i++)
959 struct variable *in_var = dict_get_var (file->dict, i);
960 struct variable *out_var = dict_lookup_var (mtf.dict,
961 var_get_name (in_var));
965 struct mtf_variable *mv = &file->vars[file->var_cnt++];
967 mv->out_var = out_var;
972 /* Add IN, FIRST, and LAST variables to master dictionary. */
973 ll_for_each (file, struct mtf_file, ll, &mtf.files)
974 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
976 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
977 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
980 dict_delete_scratch_vars (mtf.dict);
981 dict_compact_values (mtf.dict);
982 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
983 taint = taint_clone (casewriter_get_taint (mtf.output));
985 ll_for_each (file, struct mtf_file, ll, &mtf.files)
987 if (file->reader == NULL)
989 if (active_file == NULL)
991 proc_discard_output (ds);
992 file->reader = active_file = proc_open (ds);
995 file->reader = casereader_clone (active_file);
997 taint_propagate (casereader_get_taint (file->reader), taint);
1000 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1001 mtf_read_record (&mtf, file);
1002 while (mtf.nonempty_files > 0)
1003 mtf_process_case (&mtf);
1004 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1006 if (mtf.last != NULL)
1007 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1008 casewriter_write (mtf.output, &mtf.buffered_case);
1009 case_nullify (&mtf.buffered_case);
1011 mtf_close_all_files (&mtf);
1012 if (active_file != NULL)
1015 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1021 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1024 if (active_file != NULL)
1027 taint_destroy (taint);
1028 return CMD_CASCADING_FAILURE;
1031 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1032 attempts to create a variable named VAR_NAME, with format
1033 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1034 Returns true if successful, false if the variable name is a
1035 duplicate (in which case a message saying that the variable
1036 specified on the given SUBCOMMAND is a duplicate is emitted).
1037 Also returns true, without doing anything, if VAR_NAME is null
1040 create_flag_var (const char *subcommand, const char *var_name,
1041 struct dictionary *dict, struct variable **var)
1043 if (var_name != NULL && var_name[0] != '\0')
1045 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1046 *var = dict_create_var (dict, var_name, 0);
1049 msg (SE, _("Variable name %s specified on %s subcommand "
1050 "duplicates an existing variable name."),
1051 subcommand, var_name);
1054 var_set_both_formats (*var, &format);
1061 /* Return a string in an allocated buffer describing V's variable
1064 var_type_description (struct variable *v)
1066 if (var_is_numeric (v))
1067 return xstrdup ("numeric");
1069 return xasprintf ("string with width %d", var_get_width (v));
1072 /* Closes all the files in MTF and frees their associated data.
1073 Returns true if successful, false if an I/O error occurred on
1074 any of the files. */
1076 mtf_close_all_files (struct mtf_proc *mtf)
1078 struct mtf_file *file;
1081 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1083 casereader_destroy (file->reader);
1085 dict_destroy (file->dict);
1086 free (file->in_name);
1087 case_destroy (&file->input);
1095 /* Frees all the data for the MATCH FILES procedure. */
1097 mtf_free (struct mtf_proc *mtf)
1099 mtf_close_all_files (mtf);
1100 dict_destroy (mtf->dict);
1101 casewriter_destroy (mtf->output);
1102 case_destroy (&mtf->buffered_case);
1103 case_destroy (&mtf->prev_BY_case);
1106 /* Reads the next record into FILE, if possible, and update MTF's
1107 nonempty_files count if not. */
1109 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1111 case_destroy (&file->input);
1112 if (!casereader_read (file->reader, &file->input))
1114 mtf->nonempty_files--;
1121 /* Compare the BY variables for files A and B; return -1 if A <
1122 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1123 then all records are equal.) */
1125 mtf_compare_BY_values (struct mtf_proc *mtf,
1126 struct mtf_file *a, struct mtf_file *b)
1128 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1131 /* Processes input files and write one case to the output file. */
1133 mtf_process_case (struct mtf_proc *mtf)
1136 struct mtf_file *min;
1137 struct mtf_file *file;
1141 /* Find the set of one or more FILEs whose BY values are
1142 minimal, as well as the set of zero or more TABLEs whose BY
1143 values equal those of the minimum FILEs.
1145 After each iteration of the loop, this invariant holds: the
1146 FILEs with minimum BY values thus far have "sequence"
1147 members equal to min_sequence, and "min" points to one of
1148 the mtf_files whose case has those minimum BY values, and
1149 similarly for TABLEs. */
1152 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1153 if (case_is_null (&file->input))
1154 file->sequence = -1;
1155 else if (file->type == MTF_FILE)
1157 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1159 file->sequence = cmp < 0 ? -1 : min_sequence;
1162 file->sequence = ++min_sequence;
1169 assert (min != NULL);
1172 cmp = mtf_compare_BY_values (mtf, min, file);
1174 while (cmp > 0 && mtf_read_record (mtf, file));
1175 file->sequence = cmp == 0 ? min_sequence : -1;
1178 /* Form the output case from the input cases. */
1179 case_create (&c, dict_get_next_value_idx (mtf->dict));
1180 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1182 struct variable *v = dict_get_var (mtf->dict, i);
1183 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1185 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1187 bool include_file = file->sequence == min_sequence;
1189 for (i = 0; i < file->var_cnt; i++)
1191 const struct mtf_variable *mv = &file->vars[i];
1192 const union value *in = case_data (&file->input, mv->in_var);
1193 union value *out = case_data_rw (&c, mv->out_var);
1194 value_copy (out, in, var_get_width (mv->in_var));
1196 if (file->in_var != NULL)
1197 case_data_rw (&c, file->in_var)->f = include_file;
1200 /* Write the output case. */
1201 if (mtf->first == NULL && mtf->last == NULL)
1203 /* With no FIRST or LAST variables, it's trivial. */
1204 casewriter_write (mtf->output, &c);
1208 /* It's harder with LAST, because we can't know whether
1209 this case is the last in a group until we've prepared
1210 the *next* case also. Thus, we buffer the previous
1211 output case until the next one is ready.
1213 We also have to save a copy of one of the previous input
1214 cases, so that we can compare the BY variables. We
1215 can't compare the BY variables between the current
1216 output case and the saved one because the BY variables
1217 might not be in the output (the user is allowed to drop
1220 if (mtf->prev_BY != NULL)
1222 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1223 min->by, mtf->prev_BY,
1225 if (mtf->last != NULL)
1226 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1227 casewriter_write (mtf->output, &mtf->buffered_case);
1232 case_move (&mtf->buffered_case, &c);
1233 if (mtf->first != NULL)
1234 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1238 mtf->prev_BY = min->by;
1239 case_destroy (&mtf->prev_BY_case);
1240 case_clone (&mtf->prev_BY_case, &min->input);
1244 /* Read another record from each input file FILE with minimum
1246 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1247 if (file->type == MTF_FILE)
1249 if (file->sequence == min_sequence)
1250 mtf_read_record (mtf, file);
1256 /* Merge the dictionary for file F into master dictionary M. */
1258 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1260 struct dictionary *d = f->dict;
1261 const char *d_docs, *m_docs;
1264 if (dict_get_label (m) == NULL)
1265 dict_set_label (m, dict_get_label (d));
1267 d_docs = dict_get_documents (d);
1268 m_docs = dict_get_documents (m);
1272 dict_set_documents (m, d_docs);
1275 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1276 dict_set_documents (m, new_docs);
1281 for (i = 0; i < dict_get_var_cnt (d); i++)
1283 struct variable *dv = dict_get_var (d, i);
1284 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1286 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1291 if (var_get_width (mv) != var_get_width (dv))
1293 char *dv_description = var_type_description (dv);
1294 char *mv_description = var_type_description (mv);
1295 msg (SE, _("Variable %s in file %s (%s) has different "
1296 "type or width from the same variable in "
1297 "earlier file (%s)."),
1298 var_get_name (dv), fh_get_name (f->handle),
1299 dv_description, mv_description);
1300 free (dv_description);
1301 free (mv_description);
1305 if (var_get_width (dv) == var_get_width (mv))
1307 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1308 var_set_value_labels (mv, var_get_value_labels (dv));
1309 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1310 var_set_missing_values (mv, var_get_missing_values (dv));
1313 if (var_get_label (dv) && !var_get_label (mv))
1314 var_set_label (mv, var_get_label (dv));
1317 mv = dict_clone_var_assert (m, dv, var_get_name (dv));