1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/casereader.h>
25 #include <data/casewriter.h>
26 #include <data/format.h>
27 #include <data/dictionary.h>
28 #include <data/por-file-writer.h>
29 #include <data/procedure.h>
30 #include <data/settings.h>
31 #include <data/sys-file-writer.h>
32 #include <data/transformations.h>
33 #include <data/value-labels.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/lexer/lexer.h>
38 #include <language/lexer/variable-parser.h>
39 #include <libpspp/alloc.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
49 #define _(msgid) gettext (msgid)
51 /* Rearranging and reducing a dictionary. */
52 static void start_case_map (struct dictionary *);
53 static struct case_map *finish_case_map (struct dictionary *);
54 static void map_case (const struct case_map *,
55 const struct ccase *, struct ccase *);
56 static void destroy_case_map (struct case_map *);
58 static bool parse_dict_trim (struct lexer *, struct dictionary *);
60 /* Reading system and portable files. */
62 /* Type of command. */
69 static void get_translate_case (const struct ccase *, struct ccase *,
71 static bool get_destroy_case_map (void *map_);
73 /* Parses a GET or IMPORT command. */
75 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
77 struct casereader *reader = NULL;
78 struct file_handle *fh = NULL;
79 struct dictionary *dict = NULL;
80 struct case_map *map = NULL;
84 lex_match (lexer, '/');
86 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
88 lex_match (lexer, '=');
90 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
94 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
96 lex_match (lexer, '=');
98 if (lex_match_id (lexer, "COMM"))
100 else if (lex_match_id (lexer, "TAPE"))
104 lex_error (lexer, _("expecting COMM or TAPE"));
114 lex_sbc_missing (lexer, "FILE");
118 reader = any_reader_open (fh, &dict);
122 start_case_map (dict);
124 while (lex_token (lexer) != '.')
126 lex_match (lexer, '/');
127 if (!parse_dict_trim (lexer, dict))
131 map = finish_case_map (dict);
133 reader = casereader_create_translator (reader,
134 dict_get_next_value_idx (dict),
136 get_destroy_case_map,
139 proc_set_active_file (ds, reader, dict);
144 casereader_destroy (reader);
147 return CMD_CASCADING_FAILURE;
151 get_translate_case (const struct ccase *input, struct ccase *output,
154 struct case_map *map = map_;
155 map_case (map, input, output);
159 get_destroy_case_map (void *map_)
161 struct case_map *map = map_;
162 destroy_case_map (map);
168 cmd_get (struct lexer *lexer, struct dataset *ds)
170 return parse_read_command (lexer, ds, GET_CMD);
175 cmd_import (struct lexer *lexer, struct dataset *ds)
177 return parse_read_command (lexer, ds, IMPORT_CMD);
180 /* Writing system and portable files. */
182 /* Type of output file. */
185 SYSFILE_WRITER, /* System file. */
186 PORFILE_WRITER /* Portable file. */
189 /* Type of a command. */
192 XFORM_CMD, /* Transformation. */
193 PROC_CMD /* Procedure. */
196 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
197 WRITER_TYPE identifies the type of file to write,
198 and COMMAND_TYPE identifies the type of command.
200 On success, returns a writer.
201 For procedures only, sets *RETAIN_UNSELECTED to true if cases
202 that would otherwise be excluded by FILTER or USE should be
205 On failure, returns a null pointer. */
206 static struct casewriter *
207 parse_write_command (struct lexer *lexer, struct dataset *ds,
208 enum writer_type writer_type,
209 enum command_type command_type,
210 bool *retain_unselected)
213 struct file_handle *handle; /* Output file. */
214 struct dictionary *dict; /* Dictionary for output file. */
215 struct casewriter *writer; /* Writer. */
216 struct case_map *map; /* Map from input data to data for writer. */
218 /* Common options. */
219 bool print_map; /* Print map? TODO. */
220 bool print_short_names; /* Print long-to-short name map. TODO. */
221 struct sfm_write_options sysfile_opts;
222 struct pfm_write_options porfile_opts;
224 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
225 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
226 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
228 if (command_type == PROC_CMD)
229 *retain_unselected = true;
232 dict = dict_clone (dataset_dict (ds));
236 print_short_names = false;
237 sysfile_opts = sfm_writer_default_options ();
238 porfile_opts = pfm_writer_default_options ();
240 start_case_map (dict);
241 dict_delete_scratch_vars (dict);
243 lex_match (lexer, '/');
246 if (lex_match_id (lexer, "OUTFILE"))
250 lex_sbc_only_once ("OUTFILE");
254 lex_match (lexer, '=');
256 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
260 else if (lex_match_id (lexer, "NAMES"))
261 print_short_names = true;
262 else if (lex_match_id (lexer, "PERMISSIONS"))
266 lex_match (lexer, '=');
267 if (lex_match_id (lexer, "READONLY"))
269 else if (lex_match_id (lexer, "WRITEABLE"))
273 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
276 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
278 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
280 lex_match (lexer, '=');
281 if (lex_match_id (lexer, "RETAIN"))
282 *retain_unselected = true;
283 else if (lex_match_id (lexer, "DELETE"))
284 *retain_unselected = false;
287 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
291 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
292 sysfile_opts.compress = true;
293 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
294 sysfile_opts.compress = false;
295 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
297 lex_match (lexer, '=');
298 if (!lex_force_int (lexer))
300 sysfile_opts.version = lex_integer (lexer);
303 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
305 lex_match (lexer, '=');
306 if (lex_match_id (lexer, "COMMUNICATIONS"))
307 porfile_opts.type = PFM_COMM;
308 else if (lex_match_id (lexer, "TAPE"))
309 porfile_opts.type = PFM_TAPE;
312 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
316 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
318 lex_match (lexer, '=');
319 if (!lex_force_int (lexer))
321 porfile_opts.digits = lex_integer (lexer);
324 else if (!parse_dict_trim (lexer, dict))
327 if (!lex_match (lexer, '/'))
330 if (lex_end_of_command (lexer) != CMD_SUCCESS)
335 lex_sbc_missing (lexer, "OUTFILE");
339 dict_compact_values (dict);
341 if (fh_get_referent (handle) == FH_REF_FILE)
346 writer = sfm_open_writer (handle, dict, sysfile_opts);
349 writer = pfm_open_writer (handle, dict, porfile_opts);
354 writer = any_writer_open (handle, dict);
358 map = finish_case_map (dict);
360 writer = casewriter_create_translator (writer,
362 get_destroy_case_map,
369 casewriter_destroy (writer);
371 destroy_case_map (map);
375 /* SAVE and EXPORT. */
377 /* Parses and performs the SAVE or EXPORT procedure. */
379 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
381 bool retain_unselected;
382 struct variable *saved_filter_variable;
383 struct casewriter *output;
386 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
389 return CMD_CASCADING_FAILURE;
391 saved_filter_variable = dict_get_filter (dataset_dict (ds));
392 if (retain_unselected)
393 dict_set_filter (dataset_dict (ds), NULL);
395 casereader_transfer (proc_open (ds), output);
396 ok = casewriter_destroy (output);
397 ok = proc_commit (ds) && ok;
399 dict_set_filter (dataset_dict (ds), saved_filter_variable);
401 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
405 cmd_save (struct lexer *lexer, struct dataset *ds)
407 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
411 cmd_export (struct lexer *lexer, struct dataset *ds)
413 return parse_output_proc (lexer, ds, PORFILE_WRITER);
416 /* XSAVE and XEXPORT. */
418 /* Transformation. */
421 struct casewriter *writer; /* Writer. */
424 static trns_proc_func output_trns_proc;
425 static trns_free_func output_trns_free;
427 /* Parses the XSAVE or XEXPORT transformation command. */
429 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
431 struct output_trns *t = xmalloc (sizeof *t);
432 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
433 if (t->writer == NULL)
436 return CMD_CASCADING_FAILURE;
439 add_transformation (ds, output_trns_proc, output_trns_free, t);
443 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
445 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
447 struct output_trns *t = trns_;
449 case_clone (&tmp, c);
450 casewriter_write (t->writer, &tmp);
451 return TRNS_CONTINUE;
454 /* Frees an XSAVE or XEXPORT transformation.
455 Returns true if successful, false if an I/O error occurred. */
457 output_trns_free (void *trns_)
459 struct output_trns *t = trns_;
460 bool ok = casewriter_destroy (t->writer);
467 cmd_xsave (struct lexer *lexer, struct dataset *ds)
469 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
472 /* XEXPORT command. */
474 cmd_xexport (struct lexer *lexer, struct dataset *ds)
476 return parse_output_trns (lexer, ds, PORFILE_WRITER);
479 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
480 static bool drop_variables (struct lexer *, struct dictionary *dict);
481 static bool keep_variables (struct lexer *, struct dictionary *dict);
483 /* Commands that read and write system files share a great deal
484 of common syntactic structure for rearranging and dropping
485 variables. This function parses this syntax and modifies DICT
486 appropriately. Returns true on success, false on failure. */
488 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
490 if (lex_match_id (lexer, "MAP"))
495 else if (lex_match_id (lexer, "DROP"))
496 return drop_variables (lexer, dict);
497 else if (lex_match_id (lexer, "KEEP"))
498 return keep_variables (lexer, dict);
499 else if (lex_match_id (lexer, "RENAME"))
500 return rename_variables (lexer, dict);
503 lex_error (lexer, _("expecting a valid subcommand"));
508 /* Parses and performs the RENAME subcommand of GET and SAVE. */
510 rename_variables (struct lexer *lexer, struct dictionary *dict)
523 lex_match (lexer, '=');
524 if (lex_token (lexer) != '(')
528 v = parse_variable (lexer, dict);
531 if (!lex_force_match (lexer, '=')
532 || !lex_force_id (lexer))
534 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
536 msg (SE, _("Cannot rename %s as %s because there already exists "
537 "a variable named %s. To rename variables with "
538 "overlapping names, use a single RENAME subcommand "
539 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
540 "\"/RENAME (A B C=B C A)\"."),
541 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
545 dict_rename_var (dict, v, lex_tokid (lexer));
554 while (lex_match (lexer, '('))
558 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
560 if (!lex_match (lexer, '='))
562 msg (SE, _("`=' expected after variable list."));
565 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
569 msg (SE, _("Number of variables on left side of `=' (%d) does not "
570 "match number of variables on right side (%d), in "
571 "parenthesized group %d of RENAME subcommand."),
572 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
575 if (!lex_force_match (lexer, ')'))
580 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
582 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
588 for (i = 0; i < nn; i++)
596 /* Parses and performs the DROP subcommand of GET and SAVE.
597 Returns true if successful, false on failure.*/
599 drop_variables (struct lexer *lexer, struct dictionary *dict)
604 lex_match (lexer, '=');
605 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
607 dict_delete_vars (dict, v, nv);
610 if (dict_get_var_cnt (dict) == 0)
612 msg (SE, _("Cannot DROP all variables from dictionary."));
618 /* Parses and performs the KEEP subcommand of GET and SAVE.
619 Returns true if successful, false on failure.*/
621 keep_variables (struct lexer *lexer, struct dictionary *dict)
627 lex_match (lexer, '=');
628 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
631 /* Move the specified variables to the beginning. */
632 dict_reorder_vars (dict, v, nv);
634 /* Delete the remaining variables. */
635 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
636 for (i = nv; i < dict_get_var_cnt (dict); i++)
637 v[i - nv] = dict_get_var (dict, i);
638 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
649 MTF_FILE, /* Specified on FILE= subcommand. */
650 MTF_TABLE /* Specified on TABLE= subcommand. */
653 /* One of the FILEs or TABLEs on MATCH FILES. */
656 struct ll ll; /* In list of all files and tables. */
661 const struct variable **by; /* List of BY variables for this file. */
662 struct mtf_variable *vars; /* Variables to copy to output. */
663 size_t var_cnt; /* Number of other variables. */
665 struct file_handle *handle; /* Input file handle. */
666 struct dictionary *dict; /* Input file dictionary. */
667 struct casereader *reader; /* Input reader. */
668 struct ccase input; /* Input record (null at end of file). */
671 char *in_name; /* Variable name. */
672 struct variable *in_var; /* Variable (in master dictionary). */
677 struct variable *in_var;
678 struct variable *out_var;
681 /* MATCH FILES procedure. */
684 struct ll_list files; /* List of "struct mtf_file"s. */
685 int nonempty_files; /* FILEs that are not at end-of-file. */
687 bool ok; /* False if I/O error occurs. */
689 struct dictionary *dict; /* Dictionary of output file. */
690 struct casewriter *output; /* MATCH FILES output. */
692 size_t by_cnt; /* Number of variables on BY subcommand. */
695 Only if "first" or "last" is nonnull are the remaining
697 struct variable *first; /* Variable specified on FIRST (if any). */
698 struct variable *last; /* Variable specified on LAST (if any). */
699 struct ccase buffered_case; /* Case ready for output except that we don't
700 know the value for the LAST variable yet. */
701 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
702 const struct variable **prev_BY; /* Last set of BY variables. */
705 static void mtf_free (struct mtf_proc *);
707 static bool mtf_close_all_files (struct mtf_proc *);
708 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
709 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
711 static void mtf_process_case (struct mtf_proc *);
713 static bool create_flag_var (const char *subcommand_name, const char *var_name,
714 struct dictionary *, struct variable **);
715 static char *var_type_description (struct variable *);
717 /* Parse and execute the MATCH FILES command. */
719 cmd_match_files (struct lexer *lexer, struct dataset *ds)
722 struct ll *first_table;
723 struct mtf_file *file, *next;
726 struct casereader *active_file = NULL;
728 char first_name[LONG_NAME_LEN + 1] = "";
729 char last_name[LONG_NAME_LEN + 1] = "";
731 struct taint *taint = NULL;
735 ll_init (&mtf.files);
736 mtf.nonempty_files = 0;
737 first_table = ll_null (&mtf.files);
738 mtf.dict = dict_create ();
741 mtf.first = mtf.last = NULL;
742 case_nullify (&mtf.buffered_case);
743 case_nullify (&mtf.prev_BY_case);
746 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
748 lex_match (lexer, '/');
749 while (lex_token (lexer) == T_ID
750 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
751 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
753 struct mtf_file *file = xmalloc (sizeof *file);
758 file->in_name = NULL;
762 case_nullify (&file->input);
764 if (lex_match_id (lexer, "FILE"))
766 file->type = MTF_FILE;
767 ll_insert (first_table, &file->ll);
768 mtf.nonempty_files++;
770 else if (lex_match_id (lexer, "TABLE"))
772 file->type = MTF_TABLE;
773 ll_push_tail (&mtf.files, &file->ll);
774 if (first_table == ll_null (&mtf.files))
775 first_table = &file->ll;
779 lex_match (lexer, '=');
781 if (lex_match (lexer, '*'))
783 if (!proc_has_active_file (ds))
785 msg (SE, _("Cannot specify the active file since no active "
786 "file has been defined."));
790 if (proc_make_temporary_transformations_permanent (ds))
792 _("MATCH FILES may not be used after TEMPORARY when "
793 "the active file is an input source. "
794 "Temporary transformations will be made permanent."));
796 file->dict = dict_clone (dataset_dict (ds));
800 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
801 if (file->handle == NULL)
804 file->reader = any_reader_open (file->handle, &file->dict);
805 if (file->reader == NULL)
809 while (lex_match (lexer, '/'))
810 if (lex_match_id (lexer, "RENAME"))
812 if (!rename_variables (lexer, file->dict))
815 else if (lex_match_id (lexer, "IN"))
817 lex_match (lexer, '=');
818 if (lex_token (lexer) != T_ID)
820 lex_error (lexer, NULL);
824 if (file->in_name != NULL)
826 msg (SE, _("Multiple IN subcommands for a single FILE or "
830 file->in_name = xstrdup (lex_tokid (lexer));
835 mtf_merge_dictionary (mtf.dict, file);
838 while (lex_token (lexer) != '.')
840 if (lex_match (lexer, T_BY))
842 struct mtf_file *file;
843 struct variable **by;
848 lex_sbc_only_once ("BY");
852 lex_match (lexer, '=');
853 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
854 PV_NO_DUPLICATE | PV_NO_SCRATCH))
858 ll_for_each (file, struct mtf_file, ll, &mtf.files)
862 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
863 for (i = 0; i < mtf.by_cnt; i++)
865 const char *var_name = var_get_name (by[i]);
866 file->by[i] = dict_lookup_var (file->dict, var_name);
867 if (file->by[i] == NULL)
869 if (file->handle != NULL)
870 msg (SE, _("File %s lacks BY variable %s."),
871 fh_get_name (file->handle), var_name);
873 msg (SE, _("Active file lacks BY variable %s."),
884 else if (lex_match_id (lexer, "FIRST"))
886 if (first_name[0] != '\0')
888 lex_sbc_only_once ("FIRST");
892 lex_match (lexer, '=');
893 if (!lex_force_id (lexer))
895 strcpy (first_name, lex_tokid (lexer));
898 else if (lex_match_id (lexer, "LAST"))
900 if (last_name[0] != '\0')
902 lex_sbc_only_once ("LAST");
906 lex_match (lexer, '=');
907 if (!lex_force_id (lexer))
909 strcpy (last_name, lex_tokid (lexer));
912 else if (lex_match_id (lexer, "MAP"))
916 else if (lex_match_id (lexer, "DROP"))
918 if (!drop_variables (lexer, mtf.dict))
921 else if (lex_match_id (lexer, "KEEP"))
923 if (!keep_variables (lexer, mtf.dict))
928 lex_error (lexer, NULL);
932 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
934 lex_end_of_command (lexer);
941 if (first_table != ll_null (&mtf.files))
943 msg (SE, _("BY is required when TABLE is specified."));
948 msg (SE, _("BY is required when IN is specified."));
953 /* Set up mapping from each file's variables to master
955 ll_for_each (file, struct mtf_file, ll, &mtf.files)
957 size_t in_var_cnt = dict_get_var_cnt (file->dict);
959 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
961 for (i = 0; i < in_var_cnt; i++)
963 struct variable *in_var = dict_get_var (file->dict, i);
964 struct variable *out_var = dict_lookup_var (mtf.dict,
965 var_get_name (in_var));
969 struct mtf_variable *mv = &file->vars[file->var_cnt++];
971 mv->out_var = out_var;
976 /* Add IN, FIRST, and LAST variables to master dictionary. */
977 ll_for_each (file, struct mtf_file, ll, &mtf.files)
978 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
980 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
981 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
984 dict_compact_values (mtf.dict);
985 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
986 taint = taint_clone (casewriter_get_taint (mtf.output));
988 ll_for_each (file, struct mtf_file, ll, &mtf.files)
990 if (file->reader == NULL)
992 if (active_file == NULL)
994 proc_discard_output (ds);
995 file->reader = active_file = proc_open (ds);
998 file->reader = casereader_clone (active_file);
1000 taint_propagate (casereader_get_taint (file->reader), taint);
1003 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1004 mtf_read_record (&mtf, file);
1005 while (mtf.nonempty_files > 0)
1006 mtf_process_case (&mtf);
1007 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1009 if (mtf.last != NULL)
1010 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1011 casewriter_write (mtf.output, &mtf.buffered_case);
1012 case_nullify (&mtf.buffered_case);
1014 mtf_close_all_files (&mtf);
1015 if (active_file != NULL)
1018 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1024 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1027 if (active_file != NULL)
1030 taint_destroy (taint);
1031 return CMD_CASCADING_FAILURE;
1034 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1035 attempts to create a variable named VAR_NAME, with format
1036 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1037 Returns true if successful, false if the variable name is a
1038 duplicate (in which case a message saying that the variable
1039 specified on the given SUBCOMMAND is a duplicate is emitted).
1040 Also returns true, without doing anything, if VAR_NAME is null
1043 create_flag_var (const char *subcommand, const char *var_name,
1044 struct dictionary *dict, struct variable **var)
1046 if (var_name != NULL && var_name[0] != '\0')
1048 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1049 *var = dict_create_var (dict, var_name, 0);
1052 msg (SE, _("Variable name %s specified on %s subcommand "
1053 "duplicates an existing variable name."),
1054 subcommand, var_name);
1057 var_set_both_formats (*var, &format);
1064 /* Return a string in an allocated buffer describing V's variable
1067 var_type_description (struct variable *v)
1069 if (var_is_numeric (v))
1070 return xstrdup ("numeric");
1072 return xasprintf ("string with width %d", var_get_width (v));
1075 /* Closes all the files in MTF and frees their associated data.
1076 Returns true if successful, false if an I/O error occurred on
1077 any of the files. */
1079 mtf_close_all_files (struct mtf_proc *mtf)
1081 struct mtf_file *file;
1084 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1086 casereader_destroy (file->reader);
1088 dict_destroy (file->dict);
1089 free (file->in_name);
1090 case_destroy (&file->input);
1098 /* Frees all the data for the MATCH FILES procedure. */
1100 mtf_free (struct mtf_proc *mtf)
1102 mtf_close_all_files (mtf);
1103 dict_destroy (mtf->dict);
1104 casewriter_destroy (mtf->output);
1105 case_destroy (&mtf->buffered_case);
1106 case_destroy (&mtf->prev_BY_case);
1109 /* Reads the next record into FILE, if possible, and update MTF's
1110 nonempty_files count if not. */
1112 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1114 case_destroy (&file->input);
1115 if (!casereader_read (file->reader, &file->input))
1117 mtf->nonempty_files--;
1124 /* Compare the BY variables for files A and B; return -1 if A <
1125 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1126 then all records are equal.) */
1128 mtf_compare_BY_values (struct mtf_proc *mtf,
1129 struct mtf_file *a, struct mtf_file *b)
1131 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1134 /* Processes input files and write one case to the output file. */
1136 mtf_process_case (struct mtf_proc *mtf)
1139 struct mtf_file *min;
1140 struct mtf_file *file;
1144 /* Find the set of one or more FILEs whose BY values are
1145 minimal, as well as the set of zero or more TABLEs whose BY
1146 values equal those of the minimum FILEs.
1148 After each iteration of the loop, this invariant holds: the
1149 FILEs with minimum BY values thus far have "sequence"
1150 members equal to min_sequence, and "min" points to one of
1151 the mtf_files whose case has those minimum BY values, and
1152 similarly for TABLEs. */
1155 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1156 if (case_is_null (&file->input))
1157 file->sequence = -1;
1158 else if (file->type == MTF_FILE)
1160 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1162 file->sequence = cmp < 0 ? -1 : min_sequence;
1165 file->sequence = ++min_sequence;
1172 assert (min != NULL);
1175 cmp = mtf_compare_BY_values (mtf, min, file);
1177 while (cmp > 0 && mtf_read_record (mtf, file));
1178 file->sequence = cmp == 0 ? min_sequence : -1;
1181 /* Form the output case from the input cases. */
1182 case_create (&c, dict_get_next_value_idx (mtf->dict));
1183 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1185 struct variable *v = dict_get_var (mtf->dict, i);
1186 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1188 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1190 bool include_file = file->sequence == min_sequence;
1192 for (i = 0; i < file->var_cnt; i++)
1194 const struct mtf_variable *mv = &file->vars[i];
1195 const union value *in = case_data (&file->input, mv->in_var);
1196 union value *out = case_data_rw (&c, mv->out_var);
1197 value_copy (out, in, var_get_width (mv->in_var));
1199 if (file->in_var != NULL)
1200 case_data_rw (&c, file->in_var)->f = include_file;
1203 /* Write the output case. */
1204 if (mtf->first == NULL && mtf->last == NULL)
1206 /* With no FIRST or LAST variables, it's trivial. */
1207 casewriter_write (mtf->output, &c);
1211 /* It's harder with LAST, because we can't know whether
1212 this case is the last in a group until we've prepared
1213 the *next* case also. Thus, we buffer the previous
1214 output case until the next one is ready.
1216 We also have to save a copy of one of the previous input
1217 cases, so that we can compare the BY variables. We
1218 can't compare the BY variables between the current
1219 output case and the saved one because the BY variables
1220 might not be in the output (the user is allowed to drop
1223 if (mtf->prev_BY != NULL)
1225 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1226 min->by, mtf->prev_BY,
1228 if (mtf->last != NULL)
1229 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1230 casewriter_write (mtf->output, &mtf->buffered_case);
1235 case_move (&mtf->buffered_case, &c);
1236 if (mtf->first != NULL)
1237 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1241 mtf->prev_BY = min->by;
1242 case_destroy (&mtf->prev_BY_case);
1243 case_clone (&mtf->prev_BY_case, &min->input);
1247 /* Read another record from each input file FILE with minimum
1249 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1250 if (file->type == MTF_FILE)
1252 if (file->sequence == min_sequence)
1253 mtf_read_record (mtf, file);
1259 /* Merge the dictionary for file F into master dictionary M. */
1261 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1263 struct dictionary *d = f->dict;
1264 const char *d_docs, *m_docs;
1267 if (dict_get_label (m) == NULL)
1268 dict_set_label (m, dict_get_label (d));
1270 d_docs = dict_get_documents (d);
1271 m_docs = dict_get_documents (m);
1275 dict_set_documents (m, d_docs);
1278 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1279 dict_set_documents (m, new_docs);
1284 for (i = 0; i < dict_get_var_cnt (d); i++)
1286 struct variable *dv = dict_get_var (d, i);
1287 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1289 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1294 if (var_get_width (mv) != var_get_width (dv))
1296 char *dv_description = var_type_description (dv);
1297 char *mv_description = var_type_description (mv);
1298 msg (SE, _("Variable %s in file %s (%s) has different "
1299 "type or width from the same variable in "
1300 "earlier file (%s)."),
1301 var_get_name (dv), fh_get_name (f->handle),
1302 dv_description, mv_description);
1303 free (dv_description);
1304 free (mv_description);
1308 if (var_get_width (dv) == var_get_width (mv))
1310 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1311 var_set_value_labels (mv, var_get_value_labels (dv));
1312 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1313 var_set_missing_values (mv, var_get_missing_values (dv));
1316 if (var_get_label (dv) && !var_get_label (mv))
1317 var_set_label (mv, var_get_label (dv));
1320 mv = dict_clone_var_assert (m, dv, var_get_name (dv));
1328 A case map copies data from a case that corresponds for one
1329 dictionary to a case that corresponds to a second dictionary
1330 derived from the first by, optionally, deleting, reordering,
1331 or renaming variables. (No new variables may be created.)
1337 size_t value_cnt; /* Number of values in map. */
1338 int *map; /* For each destination index, the
1339 corresponding source index. */
1342 /* Prepares dictionary D for producing a case map. Afterward,
1343 the caller may delete, reorder, or rename variables within D
1344 at will before using finish_case_map() to produce the case
1347 Uses D's aux members, which must otherwise not be in use. */
1349 start_case_map (struct dictionary *d)
1351 size_t var_cnt = dict_get_var_cnt (d);
1354 for (i = 0; i < var_cnt; i++)
1356 struct variable *v = dict_get_var (d, i);
1357 int *src_fv = xmalloc (sizeof *src_fv);
1358 *src_fv = var_get_case_index (v);
1359 var_attach_aux (v, src_fv, var_dtor_free);
1363 /* Produces a case map from dictionary D, which must have been
1364 previously prepared with start_case_map().
1366 Does not retain any reference to D, and clears the aux members
1367 set up by start_case_map().
1369 Returns the new case map, or a null pointer if no mapping is
1370 required (that is, no data has changed position). */
1371 static struct case_map *
1372 finish_case_map (struct dictionary *d)
1374 struct case_map *map;
1375 size_t var_cnt = dict_get_var_cnt (d);
1379 map = xmalloc (sizeof *map);
1380 map->value_cnt = dict_get_next_value_idx (d);
1381 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1382 for (i = 0; i < map->value_cnt; i++)
1386 for (i = 0; i < var_cnt; i++)
1388 struct variable *v = dict_get_var (d, i);
1389 size_t value_cnt = var_get_value_cnt (v);
1390 int *src_fv = (int *) var_detach_aux (v);
1393 if (var_get_case_index (v) != *src_fv)
1396 for (idx = 0; idx < value_cnt; idx++)
1398 int src_idx = *src_fv + idx;
1399 int dst_idx = var_get_case_index (v) + idx;
1401 assert (map->map[dst_idx] == -1);
1402 map->map[dst_idx] = src_idx;
1409 destroy_case_map (map);
1413 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1419 /* Maps from SRC to DST, applying case map MAP. */
1421 map_case (const struct case_map *map,
1422 const struct ccase *src, struct ccase *dst)
1426 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1428 int src_idx = map->map[dst_idx];
1430 *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
1434 /* Destroys case map MAP. */
1436 destroy_case_map (struct case_map *map)