1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/case-map.h>
25 #include <data/casereader.h>
26 #include <data/casewriter.h>
27 #include <data/format.h>
28 #include <data/dictionary.h>
29 #include <data/por-file-writer.h>
30 #include <data/procedure.h>
31 #include <data/settings.h>
32 #include <data/sys-file-writer.h>
33 #include <data/transformations.h>
34 #include <data/value-labels.h>
35 #include <data/variable.h>
36 #include <language/command.h>
37 #include <language/data-io/file-handle.h>
38 #include <language/lexer/lexer.h>
39 #include <language/lexer/variable-parser.h>
40 #include <libpspp/alloc.h>
41 #include <libpspp/assertion.h>
42 #include <libpspp/compiler.h>
43 #include <libpspp/hash.h>
44 #include <libpspp/message.h>
45 #include <libpspp/misc.h>
46 #include <libpspp/str.h>
47 #include <libpspp/taint.h>
50 #define _(msgid) gettext (msgid)
52 static bool parse_dict_trim (struct lexer *, struct dictionary *);
54 /* Reading system and portable files. */
56 /* Type of command. */
63 static void get_translate_case (struct ccase *, struct ccase *, void *map_);
64 static bool get_destroy_case_map (void *map_);
66 /* Parses a GET or IMPORT command. */
68 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
70 struct casereader *reader = NULL;
71 struct file_handle *fh = NULL;
72 struct dictionary *dict = NULL;
73 struct case_map *map = NULL;
77 lex_match (lexer, '/');
79 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
81 lex_match (lexer, '=');
83 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
87 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
89 lex_match (lexer, '=');
91 if (lex_match_id (lexer, "COMM"))
93 else if (lex_match_id (lexer, "TAPE"))
97 lex_error (lexer, _("expecting COMM or TAPE"));
107 lex_sbc_missing (lexer, "FILE");
111 reader = any_reader_open (fh, &dict);
115 case_map_prepare_dict (dict);
117 while (lex_token (lexer) != '.')
119 lex_match (lexer, '/');
120 if (!parse_dict_trim (lexer, dict))
123 dict_compact_values (dict);
125 map = case_map_from_dict (dict);
127 reader = casereader_create_translator (reader,
128 dict_get_next_value_idx (dict),
130 get_destroy_case_map,
133 proc_set_active_file (ds, reader, dict);
138 casereader_destroy (reader);
141 return CMD_CASCADING_FAILURE;
145 get_translate_case (struct ccase *input, struct ccase *output,
148 struct case_map *map = map_;
149 case_map_execute (map, input, output);
150 case_destroy (input);
154 get_destroy_case_map (void *map_)
156 struct case_map *map = map_;
157 case_map_destroy (map);
163 cmd_get (struct lexer *lexer, struct dataset *ds)
165 return parse_read_command (lexer, ds, GET_CMD);
170 cmd_import (struct lexer *lexer, struct dataset *ds)
172 return parse_read_command (lexer, ds, IMPORT_CMD);
175 /* Writing system and portable files. */
177 /* Type of output file. */
180 SYSFILE_WRITER, /* System file. */
181 PORFILE_WRITER /* Portable file. */
184 /* Type of a command. */
187 XFORM_CMD, /* Transformation. */
188 PROC_CMD /* Procedure. */
191 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
192 WRITER_TYPE identifies the type of file to write,
193 and COMMAND_TYPE identifies the type of command.
195 On success, returns a writer.
196 For procedures only, sets *RETAIN_UNSELECTED to true if cases
197 that would otherwise be excluded by FILTER or USE should be
200 On failure, returns a null pointer. */
201 static struct casewriter *
202 parse_write_command (struct lexer *lexer, struct dataset *ds,
203 enum writer_type writer_type,
204 enum command_type command_type,
205 bool *retain_unselected)
208 struct file_handle *handle; /* Output file. */
209 struct dictionary *dict; /* Dictionary for output file. */
210 struct casewriter *writer; /* Writer. */
211 struct case_map *map; /* Map from input data to data for writer. */
213 /* Common options. */
214 bool print_map; /* Print map? TODO. */
215 bool print_short_names; /* Print long-to-short name map. TODO. */
216 struct sfm_write_options sysfile_opts;
217 struct pfm_write_options porfile_opts;
219 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
220 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
221 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
223 if (command_type == PROC_CMD)
224 *retain_unselected = true;
227 dict = dict_clone (dataset_dict (ds));
231 print_short_names = false;
232 sysfile_opts = sfm_writer_default_options ();
233 porfile_opts = pfm_writer_default_options ();
235 case_map_prepare_dict (dict);
236 dict_delete_scratch_vars (dict);
238 lex_match (lexer, '/');
241 if (lex_match_id (lexer, "OUTFILE"))
245 lex_sbc_only_once ("OUTFILE");
249 lex_match (lexer, '=');
251 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
255 else if (lex_match_id (lexer, "NAMES"))
256 print_short_names = true;
257 else if (lex_match_id (lexer, "PERMISSIONS"))
261 lex_match (lexer, '=');
262 if (lex_match_id (lexer, "READONLY"))
264 else if (lex_match_id (lexer, "WRITEABLE"))
268 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
271 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
273 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
275 lex_match (lexer, '=');
276 if (lex_match_id (lexer, "RETAIN"))
277 *retain_unselected = true;
278 else if (lex_match_id (lexer, "DELETE"))
279 *retain_unselected = false;
282 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
286 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
287 sysfile_opts.compress = true;
288 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
289 sysfile_opts.compress = false;
290 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
292 lex_match (lexer, '=');
293 if (!lex_force_int (lexer))
295 sysfile_opts.version = lex_integer (lexer);
298 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
300 lex_match (lexer, '=');
301 if (lex_match_id (lexer, "COMMUNICATIONS"))
302 porfile_opts.type = PFM_COMM;
303 else if (lex_match_id (lexer, "TAPE"))
304 porfile_opts.type = PFM_TAPE;
307 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
311 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
313 lex_match (lexer, '=');
314 if (!lex_force_int (lexer))
316 porfile_opts.digits = lex_integer (lexer);
319 else if (!parse_dict_trim (lexer, dict))
322 if (!lex_match (lexer, '/'))
325 if (lex_end_of_command (lexer) != CMD_SUCCESS)
330 lex_sbc_missing (lexer, "OUTFILE");
334 dict_delete_scratch_vars (dict);
335 dict_compact_values (dict);
337 if (fh_get_referent (handle) == FH_REF_FILE)
342 writer = sfm_open_writer (handle, dict, sysfile_opts);
345 writer = pfm_open_writer (handle, dict, porfile_opts);
350 writer = any_writer_open (handle, dict);
354 map = case_map_from_dict (dict);
356 writer = casewriter_create_translator (writer,
357 case_map_get_value_cnt (map),
359 get_destroy_case_map,
366 casewriter_destroy (writer);
368 case_map_destroy (map);
372 /* SAVE and EXPORT. */
374 /* Parses and performs the SAVE or EXPORT procedure. */
376 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
378 bool retain_unselected;
379 struct variable *saved_filter_variable;
380 struct casewriter *output;
383 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
386 return CMD_CASCADING_FAILURE;
388 saved_filter_variable = dict_get_filter (dataset_dict (ds));
389 if (retain_unselected)
390 dict_set_filter (dataset_dict (ds), NULL);
392 casereader_transfer (proc_open (ds), output);
393 ok = casewriter_destroy (output);
394 ok = proc_commit (ds) && ok;
396 dict_set_filter (dataset_dict (ds), saved_filter_variable);
398 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
402 cmd_save (struct lexer *lexer, struct dataset *ds)
404 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
408 cmd_export (struct lexer *lexer, struct dataset *ds)
410 return parse_output_proc (lexer, ds, PORFILE_WRITER);
413 /* XSAVE and XEXPORT. */
415 /* Transformation. */
418 struct casewriter *writer; /* Writer. */
421 static trns_proc_func output_trns_proc;
422 static trns_free_func output_trns_free;
424 /* Parses the XSAVE or XEXPORT transformation command. */
426 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
428 struct output_trns *t = xmalloc (sizeof *t);
429 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
430 if (t->writer == NULL)
433 return CMD_CASCADING_FAILURE;
436 add_transformation (ds, output_trns_proc, output_trns_free, t);
440 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
442 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
444 struct output_trns *t = trns_;
446 case_clone (&tmp, c);
447 casewriter_write (t->writer, &tmp);
448 return TRNS_CONTINUE;
451 /* Frees an XSAVE or XEXPORT transformation.
452 Returns true if successful, false if an I/O error occurred. */
454 output_trns_free (void *trns_)
456 struct output_trns *t = trns_;
457 bool ok = casewriter_destroy (t->writer);
464 cmd_xsave (struct lexer *lexer, struct dataset *ds)
466 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
469 /* XEXPORT command. */
471 cmd_xexport (struct lexer *lexer, struct dataset *ds)
473 return parse_output_trns (lexer, ds, PORFILE_WRITER);
476 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
477 static bool drop_variables (struct lexer *, struct dictionary *dict);
478 static bool keep_variables (struct lexer *, struct dictionary *dict);
480 /* Commands that read and write system files share a great deal
481 of common syntactic structure for rearranging and dropping
482 variables. This function parses this syntax and modifies DICT
483 appropriately. Returns true on success, false on failure. */
485 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
487 if (lex_match_id (lexer, "MAP"))
492 else if (lex_match_id (lexer, "DROP"))
493 return drop_variables (lexer, dict);
494 else if (lex_match_id (lexer, "KEEP"))
495 return keep_variables (lexer, dict);
496 else if (lex_match_id (lexer, "RENAME"))
497 return rename_variables (lexer, dict);
500 lex_error (lexer, _("expecting a valid subcommand"));
505 /* Parses and performs the RENAME subcommand of GET and SAVE. */
507 rename_variables (struct lexer *lexer, struct dictionary *dict)
520 lex_match (lexer, '=');
521 if (lex_token (lexer) != '(')
525 v = parse_variable (lexer, dict);
528 if (!lex_force_match (lexer, '=')
529 || !lex_force_id (lexer))
531 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
533 msg (SE, _("Cannot rename %s as %s because there already exists "
534 "a variable named %s. To rename variables with "
535 "overlapping names, use a single RENAME subcommand "
536 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
537 "\"/RENAME (A B C=B C A)\"."),
538 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
542 dict_rename_var (dict, v, lex_tokid (lexer));
551 while (lex_match (lexer, '('))
555 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
557 if (!lex_match (lexer, '='))
559 msg (SE, _("`=' expected after variable list."));
562 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
566 msg (SE, _("Number of variables on left side of `=' (%d) does not "
567 "match number of variables on right side (%d), in "
568 "parenthesized group %d of RENAME subcommand."),
569 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
572 if (!lex_force_match (lexer, ')'))
577 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
579 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
585 for (i = 0; i < nn; i++)
593 /* Parses and performs the DROP subcommand of GET and SAVE.
594 Returns true if successful, false on failure.*/
596 drop_variables (struct lexer *lexer, struct dictionary *dict)
601 lex_match (lexer, '=');
602 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
604 dict_delete_vars (dict, v, nv);
607 if (dict_get_var_cnt (dict) == 0)
609 msg (SE, _("Cannot DROP all variables from dictionary."));
615 /* Parses and performs the KEEP subcommand of GET and SAVE.
616 Returns true if successful, false on failure.*/
618 keep_variables (struct lexer *lexer, struct dictionary *dict)
624 lex_match (lexer, '=');
625 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
628 /* Move the specified variables to the beginning. */
629 dict_reorder_vars (dict, v, nv);
631 /* Delete the remaining variables. */
632 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
633 for (i = nv; i < dict_get_var_cnt (dict); i++)
634 v[i - nv] = dict_get_var (dict, i);
635 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
646 MTF_FILE, /* Specified on FILE= subcommand. */
647 MTF_TABLE /* Specified on TABLE= subcommand. */
650 /* One of the FILEs or TABLEs on MATCH FILES. */
653 struct ll ll; /* In list of all files and tables. */
658 const struct variable **by; /* List of BY variables for this file. */
659 struct mtf_variable *vars; /* Variables to copy to output. */
660 size_t var_cnt; /* Number of other variables. */
662 struct file_handle *handle; /* Input file handle. */
663 struct dictionary *dict; /* Input file dictionary. */
664 struct casereader *reader; /* Input reader. */
665 struct ccase input; /* Input record (null at end of file). */
668 char *in_name; /* Variable name. */
669 struct variable *in_var; /* Variable (in master dictionary). */
674 struct variable *in_var;
675 struct variable *out_var;
678 /* MATCH FILES procedure. */
681 struct ll_list files; /* List of "struct mtf_file"s. */
682 int nonempty_files; /* FILEs that are not at end-of-file. */
684 bool ok; /* False if I/O error occurs. */
686 struct dictionary *dict; /* Dictionary of output file. */
687 struct casewriter *output; /* MATCH FILES output. */
689 size_t by_cnt; /* Number of variables on BY subcommand. */
692 Only if "first" or "last" is nonnull are the remaining
694 struct variable *first; /* Variable specified on FIRST (if any). */
695 struct variable *last; /* Variable specified on LAST (if any). */
696 struct ccase buffered_case; /* Case ready for output except that we don't
697 know the value for the LAST variable yet. */
698 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
699 const struct variable **prev_BY; /* Last set of BY variables. */
702 static void mtf_free (struct mtf_proc *);
704 static bool mtf_close_all_files (struct mtf_proc *);
705 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
706 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
708 static void mtf_process_case (struct mtf_proc *);
710 static bool create_flag_var (const char *subcommand_name, const char *var_name,
711 struct dictionary *, struct variable **);
712 static char *var_type_description (struct variable *);
714 /* Parse and execute the MATCH FILES command. */
716 cmd_match_files (struct lexer *lexer, struct dataset *ds)
719 struct ll *first_table;
720 struct mtf_file *file, *next;
723 struct casereader *active_file = NULL;
725 char first_name[LONG_NAME_LEN + 1] = "";
726 char last_name[LONG_NAME_LEN + 1] = "";
728 struct taint *taint = NULL;
732 ll_init (&mtf.files);
733 mtf.nonempty_files = 0;
734 first_table = ll_null (&mtf.files);
735 mtf.dict = dict_create ();
738 mtf.first = mtf.last = NULL;
739 case_nullify (&mtf.buffered_case);
740 case_nullify (&mtf.prev_BY_case);
743 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
745 lex_match (lexer, '/');
746 while (lex_token (lexer) == T_ID
747 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
748 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
750 struct mtf_file *file = xmalloc (sizeof *file);
755 file->in_name = NULL;
759 case_nullify (&file->input);
761 if (lex_match_id (lexer, "FILE"))
763 file->type = MTF_FILE;
764 ll_insert (first_table, &file->ll);
765 mtf.nonempty_files++;
767 else if (lex_match_id (lexer, "TABLE"))
769 file->type = MTF_TABLE;
770 ll_push_tail (&mtf.files, &file->ll);
771 if (first_table == ll_null (&mtf.files))
772 first_table = &file->ll;
776 lex_match (lexer, '=');
778 if (lex_match (lexer, '*'))
780 if (!proc_has_active_file (ds))
782 msg (SE, _("Cannot specify the active file since no active "
783 "file has been defined."));
787 if (proc_make_temporary_transformations_permanent (ds))
789 _("MATCH FILES may not be used after TEMPORARY when "
790 "the active file is an input source. "
791 "Temporary transformations will be made permanent."));
793 file->dict = dict_clone (dataset_dict (ds));
797 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
798 if (file->handle == NULL)
801 file->reader = any_reader_open (file->handle, &file->dict);
802 if (file->reader == NULL)
806 while (lex_match (lexer, '/'))
807 if (lex_match_id (lexer, "RENAME"))
809 if (!rename_variables (lexer, file->dict))
812 else if (lex_match_id (lexer, "IN"))
814 lex_match (lexer, '=');
815 if (lex_token (lexer) != T_ID)
817 lex_error (lexer, NULL);
821 if (file->in_name != NULL)
823 msg (SE, _("Multiple IN subcommands for a single FILE or "
827 file->in_name = xstrdup (lex_tokid (lexer));
832 mtf_merge_dictionary (mtf.dict, file);
835 while (lex_token (lexer) != '.')
837 if (lex_match (lexer, T_BY))
839 struct mtf_file *file;
840 struct variable **by;
845 lex_sbc_only_once ("BY");
849 lex_match (lexer, '=');
850 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
851 PV_NO_DUPLICATE | PV_NO_SCRATCH))
855 ll_for_each (file, struct mtf_file, ll, &mtf.files)
859 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
860 for (i = 0; i < mtf.by_cnt; i++)
862 const char *var_name = var_get_name (by[i]);
863 file->by[i] = dict_lookup_var (file->dict, var_name);
864 if (file->by[i] == NULL)
866 if (file->handle != NULL)
867 msg (SE, _("File %s lacks BY variable %s."),
868 fh_get_name (file->handle), var_name);
870 msg (SE, _("Active file lacks BY variable %s."),
881 else if (lex_match_id (lexer, "FIRST"))
883 if (first_name[0] != '\0')
885 lex_sbc_only_once ("FIRST");
889 lex_match (lexer, '=');
890 if (!lex_force_id (lexer))
892 strcpy (first_name, lex_tokid (lexer));
895 else if (lex_match_id (lexer, "LAST"))
897 if (last_name[0] != '\0')
899 lex_sbc_only_once ("LAST");
903 lex_match (lexer, '=');
904 if (!lex_force_id (lexer))
906 strcpy (last_name, lex_tokid (lexer));
909 else if (lex_match_id (lexer, "MAP"))
913 else if (lex_match_id (lexer, "DROP"))
915 if (!drop_variables (lexer, mtf.dict))
918 else if (lex_match_id (lexer, "KEEP"))
920 if (!keep_variables (lexer, mtf.dict))
925 lex_error (lexer, NULL);
929 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
931 lex_end_of_command (lexer);
938 if (first_table != ll_null (&mtf.files))
940 msg (SE, _("BY is required when TABLE is specified."));
945 msg (SE, _("BY is required when IN is specified."));
950 /* Set up mapping from each file's variables to master
952 ll_for_each (file, struct mtf_file, ll, &mtf.files)
954 size_t in_var_cnt = dict_get_var_cnt (file->dict);
956 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
958 for (i = 0; i < in_var_cnt; i++)
960 struct variable *in_var = dict_get_var (file->dict, i);
961 struct variable *out_var = dict_lookup_var (mtf.dict,
962 var_get_name (in_var));
966 struct mtf_variable *mv = &file->vars[file->var_cnt++];
968 mv->out_var = out_var;
973 /* Add IN, FIRST, and LAST variables to master dictionary. */
974 ll_for_each (file, struct mtf_file, ll, &mtf.files)
975 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
977 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
978 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
981 dict_delete_scratch_vars (mtf.dict);
982 dict_compact_values (mtf.dict);
983 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
984 taint = taint_clone (casewriter_get_taint (mtf.output));
986 ll_for_each (file, struct mtf_file, ll, &mtf.files)
988 if (file->reader == NULL)
990 if (active_file == NULL)
992 proc_discard_output (ds);
993 file->reader = active_file = proc_open (ds);
996 file->reader = casereader_clone (active_file);
998 taint_propagate (casereader_get_taint (file->reader), taint);
1001 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1002 mtf_read_record (&mtf, file);
1003 while (mtf.nonempty_files > 0)
1004 mtf_process_case (&mtf);
1005 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1007 if (mtf.last != NULL)
1008 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1009 casewriter_write (mtf.output, &mtf.buffered_case);
1010 case_nullify (&mtf.buffered_case);
1012 mtf_close_all_files (&mtf);
1013 if (active_file != NULL)
1016 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1022 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1025 if (active_file != NULL)
1028 taint_destroy (taint);
1029 return CMD_CASCADING_FAILURE;
1032 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1033 attempts to create a variable named VAR_NAME, with format
1034 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1035 Returns true if successful, false if the variable name is a
1036 duplicate (in which case a message saying that the variable
1037 specified on the given SUBCOMMAND is a duplicate is emitted).
1038 Also returns true, without doing anything, if VAR_NAME is null
1041 create_flag_var (const char *subcommand, const char *var_name,
1042 struct dictionary *dict, struct variable **var)
1044 if (var_name != NULL && var_name[0] != '\0')
1046 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1047 *var = dict_create_var (dict, var_name, 0);
1050 msg (SE, _("Variable name %s specified on %s subcommand "
1051 "duplicates an existing variable name."),
1052 subcommand, var_name);
1055 var_set_both_formats (*var, &format);
1062 /* Return a string in an allocated buffer describing V's variable
1065 var_type_description (struct variable *v)
1067 if (var_is_numeric (v))
1068 return xstrdup ("numeric");
1070 return xasprintf ("string with width %d", var_get_width (v));
1073 /* Closes all the files in MTF and frees their associated data.
1074 Returns true if successful, false if an I/O error occurred on
1075 any of the files. */
1077 mtf_close_all_files (struct mtf_proc *mtf)
1079 struct mtf_file *file;
1082 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1084 casereader_destroy (file->reader);
1086 dict_destroy (file->dict);
1087 free (file->in_name);
1088 case_destroy (&file->input);
1096 /* Frees all the data for the MATCH FILES procedure. */
1098 mtf_free (struct mtf_proc *mtf)
1100 mtf_close_all_files (mtf);
1101 dict_destroy (mtf->dict);
1102 casewriter_destroy (mtf->output);
1103 case_destroy (&mtf->buffered_case);
1104 case_destroy (&mtf->prev_BY_case);
1107 /* Reads the next record into FILE, if possible, and update MTF's
1108 nonempty_files count if not. */
1110 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1112 case_destroy (&file->input);
1113 if (!casereader_read (file->reader, &file->input))
1115 mtf->nonempty_files--;
1122 /* Compare the BY variables for files A and B; return -1 if A <
1123 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1124 then all records are equal.) */
1126 mtf_compare_BY_values (struct mtf_proc *mtf,
1127 struct mtf_file *a, struct mtf_file *b)
1129 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1132 /* Processes input files and write one case to the output file. */
1134 mtf_process_case (struct mtf_proc *mtf)
1137 struct mtf_file *min;
1138 struct mtf_file *file;
1142 /* Find the set of one or more FILEs whose BY values are
1143 minimal, as well as the set of zero or more TABLEs whose BY
1144 values equal those of the minimum FILEs.
1146 After each iteration of the loop, this invariant holds: the
1147 FILEs with minimum BY values thus far have "sequence"
1148 members equal to min_sequence, and "min" points to one of
1149 the mtf_files whose case has those minimum BY values, and
1150 similarly for TABLEs. */
1153 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1154 if (case_is_null (&file->input))
1155 file->sequence = -1;
1156 else if (file->type == MTF_FILE)
1158 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1160 file->sequence = cmp < 0 ? -1 : min_sequence;
1163 file->sequence = ++min_sequence;
1170 assert (min != NULL);
1173 cmp = mtf_compare_BY_values (mtf, min, file);
1175 while (cmp > 0 && mtf_read_record (mtf, file));
1176 file->sequence = cmp == 0 ? min_sequence : -1;
1179 /* Form the output case from the input cases. */
1180 case_create (&c, dict_get_next_value_idx (mtf->dict));
1181 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1183 struct variable *v = dict_get_var (mtf->dict, i);
1184 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1186 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1188 bool include_file = file->sequence == min_sequence;
1190 for (i = 0; i < file->var_cnt; i++)
1192 const struct mtf_variable *mv = &file->vars[i];
1193 const union value *in = case_data (&file->input, mv->in_var);
1194 union value *out = case_data_rw (&c, mv->out_var);
1195 value_copy (out, in, var_get_width (mv->in_var));
1197 if (file->in_var != NULL)
1198 case_data_rw (&c, file->in_var)->f = include_file;
1201 /* Write the output case. */
1202 if (mtf->first == NULL && mtf->last == NULL)
1204 /* With no FIRST or LAST variables, it's trivial. */
1205 casewriter_write (mtf->output, &c);
1209 /* It's harder with LAST, because we can't know whether
1210 this case is the last in a group until we've prepared
1211 the *next* case also. Thus, we buffer the previous
1212 output case until the next one is ready.
1214 We also have to save a copy of one of the previous input
1215 cases, so that we can compare the BY variables. We
1216 can't compare the BY variables between the current
1217 output case and the saved one because the BY variables
1218 might not be in the output (the user is allowed to drop
1221 if (mtf->prev_BY != NULL)
1223 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1224 min->by, mtf->prev_BY,
1226 if (mtf->last != NULL)
1227 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1228 casewriter_write (mtf->output, &mtf->buffered_case);
1233 case_move (&mtf->buffered_case, &c);
1234 if (mtf->first != NULL)
1235 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1239 mtf->prev_BY = min->by;
1240 case_destroy (&mtf->prev_BY_case);
1241 case_clone (&mtf->prev_BY_case, &min->input);
1245 /* Read another record from each input file FILE with minimum
1247 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1248 if (file->type == MTF_FILE)
1250 if (file->sequence == min_sequence)
1251 mtf_read_record (mtf, file);
1257 /* Merge the dictionary for file F into master dictionary M. */
1259 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1261 struct dictionary *d = f->dict;
1262 const char *d_docs, *m_docs;
1265 if (dict_get_label (m) == NULL)
1266 dict_set_label (m, dict_get_label (d));
1268 d_docs = dict_get_documents (d);
1269 m_docs = dict_get_documents (m);
1273 dict_set_documents (m, d_docs);
1276 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1277 dict_set_documents (m, new_docs);
1282 for (i = 0; i < dict_get_var_cnt (d); i++)
1284 struct variable *dv = dict_get_var (d, i);
1285 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1287 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1292 if (var_get_width (mv) != var_get_width (dv))
1294 char *dv_description = var_type_description (dv);
1295 char *mv_description = var_type_description (mv);
1296 msg (SE, _("Variable %s in file %s (%s) has different "
1297 "type or width from the same variable in "
1298 "earlier file (%s)."),
1299 var_get_name (dv), fh_get_name (f->handle),
1300 dv_description, mv_description);
1301 free (dv_description);
1302 free (mv_description);
1306 if (var_get_width (dv) == var_get_width (mv))
1308 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1309 var_set_value_labels (mv, var_get_value_labels (dv));
1310 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1311 var_set_missing_values (mv, var_get_missing_values (dv));
1314 if (var_get_label (dv) && !var_get_label (mv))
1315 var_set_label (mv, var_get_label (dv));
1318 mv = dict_clone_var_assert (m, dv, var_get_name (dv));