1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/casereader.h>
25 #include <data/casewriter.h>
26 #include <data/format.h>
27 #include <data/dictionary.h>
28 #include <data/por-file-writer.h>
29 #include <data/procedure.h>
30 #include <data/settings.h>
31 #include <data/sys-file-writer.h>
32 #include <data/transformations.h>
33 #include <data/value-labels.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/lexer/lexer.h>
38 #include <language/lexer/variable-parser.h>
39 #include <libpspp/alloc.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
49 #define _(msgid) gettext (msgid)
51 /* Rearranging and reducing a dictionary. */
52 static void start_case_map (struct dictionary *);
53 static struct case_map *finish_case_map (struct dictionary *);
54 static void map_case (const struct case_map *,
55 const struct ccase *, struct ccase *);
56 static void destroy_case_map (struct case_map *);
57 static size_t case_map_get_value_cnt (const struct case_map *);
59 static bool parse_dict_trim (struct lexer *, struct dictionary *);
61 /* Reading system and portable files. */
63 /* Type of command. */
70 static void get_translate_case (const struct ccase *, struct ccase *,
72 static bool get_destroy_case_map (void *map_);
74 /* Parses a GET or IMPORT command. */
76 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
78 struct casereader *reader = NULL;
79 struct file_handle *fh = NULL;
80 struct dictionary *dict = NULL;
81 struct case_map *map = NULL;
85 lex_match (lexer, '/');
87 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
89 lex_match (lexer, '=');
91 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
95 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
97 lex_match (lexer, '=');
99 if (lex_match_id (lexer, "COMM"))
101 else if (lex_match_id (lexer, "TAPE"))
105 lex_error (lexer, _("expecting COMM or TAPE"));
115 lex_sbc_missing (lexer, "FILE");
119 reader = any_reader_open (fh, &dict);
123 start_case_map (dict);
125 while (lex_token (lexer) != '.')
127 lex_match (lexer, '/');
128 if (!parse_dict_trim (lexer, dict))
132 map = finish_case_map (dict);
134 reader = casereader_create_translator (reader,
135 dict_get_next_value_idx (dict),
137 get_destroy_case_map,
140 proc_set_active_file (ds, reader, dict);
145 casereader_destroy (reader);
148 return CMD_CASCADING_FAILURE;
152 get_translate_case (const struct ccase *input, struct ccase *output,
155 struct case_map *map = map_;
156 map_case (map, input, output);
160 get_destroy_case_map (void *map_)
162 struct case_map *map = map_;
163 destroy_case_map (map);
169 cmd_get (struct lexer *lexer, struct dataset *ds)
171 return parse_read_command (lexer, ds, GET_CMD);
176 cmd_import (struct lexer *lexer, struct dataset *ds)
178 return parse_read_command (lexer, ds, IMPORT_CMD);
181 /* Writing system and portable files. */
183 /* Type of output file. */
186 SYSFILE_WRITER, /* System file. */
187 PORFILE_WRITER /* Portable file. */
190 /* Type of a command. */
193 XFORM_CMD, /* Transformation. */
194 PROC_CMD /* Procedure. */
197 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
198 WRITER_TYPE identifies the type of file to write,
199 and COMMAND_TYPE identifies the type of command.
201 On success, returns a writer.
202 For procedures only, sets *RETAIN_UNSELECTED to true if cases
203 that would otherwise be excluded by FILTER or USE should be
206 On failure, returns a null pointer. */
207 static struct casewriter *
208 parse_write_command (struct lexer *lexer, struct dataset *ds,
209 enum writer_type writer_type,
210 enum command_type command_type,
211 bool *retain_unselected)
214 struct file_handle *handle; /* Output file. */
215 struct dictionary *dict; /* Dictionary for output file. */
216 struct casewriter *writer; /* Writer. */
217 struct case_map *map; /* Map from input data to data for writer. */
219 /* Common options. */
220 bool print_map; /* Print map? TODO. */
221 bool print_short_names; /* Print long-to-short name map. TODO. */
222 struct sfm_write_options sysfile_opts;
223 struct pfm_write_options porfile_opts;
225 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
226 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
227 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
229 if (command_type == PROC_CMD)
230 *retain_unselected = true;
233 dict = dict_clone (dataset_dict (ds));
237 print_short_names = false;
238 sysfile_opts = sfm_writer_default_options ();
239 porfile_opts = pfm_writer_default_options ();
241 start_case_map (dict);
242 dict_delete_scratch_vars (dict);
244 lex_match (lexer, '/');
247 if (lex_match_id (lexer, "OUTFILE"))
251 lex_sbc_only_once ("OUTFILE");
255 lex_match (lexer, '=');
257 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
261 else if (lex_match_id (lexer, "NAMES"))
262 print_short_names = true;
263 else if (lex_match_id (lexer, "PERMISSIONS"))
267 lex_match (lexer, '=');
268 if (lex_match_id (lexer, "READONLY"))
270 else if (lex_match_id (lexer, "WRITEABLE"))
274 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
277 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
279 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
281 lex_match (lexer, '=');
282 if (lex_match_id (lexer, "RETAIN"))
283 *retain_unselected = true;
284 else if (lex_match_id (lexer, "DELETE"))
285 *retain_unselected = false;
288 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
292 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
293 sysfile_opts.compress = true;
294 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
295 sysfile_opts.compress = false;
296 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
298 lex_match (lexer, '=');
299 if (!lex_force_int (lexer))
301 sysfile_opts.version = lex_integer (lexer);
304 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
306 lex_match (lexer, '=');
307 if (lex_match_id (lexer, "COMMUNICATIONS"))
308 porfile_opts.type = PFM_COMM;
309 else if (lex_match_id (lexer, "TAPE"))
310 porfile_opts.type = PFM_TAPE;
313 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
317 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
319 lex_match (lexer, '=');
320 if (!lex_force_int (lexer))
322 porfile_opts.digits = lex_integer (lexer);
325 else if (!parse_dict_trim (lexer, dict))
328 if (!lex_match (lexer, '/'))
331 if (lex_end_of_command (lexer) != CMD_SUCCESS)
336 lex_sbc_missing (lexer, "OUTFILE");
340 dict_delete_scratch_vars (dict);
341 dict_compact_values (dict);
343 if (fh_get_referent (handle) == FH_REF_FILE)
348 writer = sfm_open_writer (handle, dict, sysfile_opts);
351 writer = pfm_open_writer (handle, dict, porfile_opts);
356 writer = any_writer_open (handle, dict);
360 map = finish_case_map (dict);
362 writer = casewriter_create_translator (writer,
363 case_map_get_value_cnt (map),
365 get_destroy_case_map,
372 casewriter_destroy (writer);
374 destroy_case_map (map);
378 /* SAVE and EXPORT. */
380 /* Parses and performs the SAVE or EXPORT procedure. */
382 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
384 bool retain_unselected;
385 struct variable *saved_filter_variable;
386 struct casewriter *output;
389 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
392 return CMD_CASCADING_FAILURE;
394 saved_filter_variable = dict_get_filter (dataset_dict (ds));
395 if (retain_unselected)
396 dict_set_filter (dataset_dict (ds), NULL);
398 casereader_transfer (proc_open (ds), output);
399 ok = casewriter_destroy (output);
400 ok = proc_commit (ds) && ok;
402 dict_set_filter (dataset_dict (ds), saved_filter_variable);
404 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
408 cmd_save (struct lexer *lexer, struct dataset *ds)
410 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
414 cmd_export (struct lexer *lexer, struct dataset *ds)
416 return parse_output_proc (lexer, ds, PORFILE_WRITER);
419 /* XSAVE and XEXPORT. */
421 /* Transformation. */
424 struct casewriter *writer; /* Writer. */
427 static trns_proc_func output_trns_proc;
428 static trns_free_func output_trns_free;
430 /* Parses the XSAVE or XEXPORT transformation command. */
432 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
434 struct output_trns *t = xmalloc (sizeof *t);
435 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
436 if (t->writer == NULL)
439 return CMD_CASCADING_FAILURE;
442 add_transformation (ds, output_trns_proc, output_trns_free, t);
446 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
448 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
450 struct output_trns *t = trns_;
452 case_clone (&tmp, c);
453 casewriter_write (t->writer, &tmp);
454 return TRNS_CONTINUE;
457 /* Frees an XSAVE or XEXPORT transformation.
458 Returns true if successful, false if an I/O error occurred. */
460 output_trns_free (void *trns_)
462 struct output_trns *t = trns_;
463 bool ok = casewriter_destroy (t->writer);
470 cmd_xsave (struct lexer *lexer, struct dataset *ds)
472 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
475 /* XEXPORT command. */
477 cmd_xexport (struct lexer *lexer, struct dataset *ds)
479 return parse_output_trns (lexer, ds, PORFILE_WRITER);
482 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
483 static bool drop_variables (struct lexer *, struct dictionary *dict);
484 static bool keep_variables (struct lexer *, struct dictionary *dict);
486 /* Commands that read and write system files share a great deal
487 of common syntactic structure for rearranging and dropping
488 variables. This function parses this syntax and modifies DICT
489 appropriately. Returns true on success, false on failure. */
491 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
493 if (lex_match_id (lexer, "MAP"))
498 else if (lex_match_id (lexer, "DROP"))
499 return drop_variables (lexer, dict);
500 else if (lex_match_id (lexer, "KEEP"))
501 return keep_variables (lexer, dict);
502 else if (lex_match_id (lexer, "RENAME"))
503 return rename_variables (lexer, dict);
506 lex_error (lexer, _("expecting a valid subcommand"));
511 /* Parses and performs the RENAME subcommand of GET and SAVE. */
513 rename_variables (struct lexer *lexer, struct dictionary *dict)
526 lex_match (lexer, '=');
527 if (lex_token (lexer) != '(')
531 v = parse_variable (lexer, dict);
534 if (!lex_force_match (lexer, '=')
535 || !lex_force_id (lexer))
537 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
539 msg (SE, _("Cannot rename %s as %s because there already exists "
540 "a variable named %s. To rename variables with "
541 "overlapping names, use a single RENAME subcommand "
542 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
543 "\"/RENAME (A B C=B C A)\"."),
544 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
548 dict_rename_var (dict, v, lex_tokid (lexer));
557 while (lex_match (lexer, '('))
561 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
563 if (!lex_match (lexer, '='))
565 msg (SE, _("`=' expected after variable list."));
568 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
572 msg (SE, _("Number of variables on left side of `=' (%d) does not "
573 "match number of variables on right side (%d), in "
574 "parenthesized group %d of RENAME subcommand."),
575 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
578 if (!lex_force_match (lexer, ')'))
583 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
585 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
591 for (i = 0; i < nn; i++)
599 /* Parses and performs the DROP subcommand of GET and SAVE.
600 Returns true if successful, false on failure.*/
602 drop_variables (struct lexer *lexer, struct dictionary *dict)
607 lex_match (lexer, '=');
608 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
610 dict_delete_vars (dict, v, nv);
613 if (dict_get_var_cnt (dict) == 0)
615 msg (SE, _("Cannot DROP all variables from dictionary."));
621 /* Parses and performs the KEEP subcommand of GET and SAVE.
622 Returns true if successful, false on failure.*/
624 keep_variables (struct lexer *lexer, struct dictionary *dict)
630 lex_match (lexer, '=');
631 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
634 /* Move the specified variables to the beginning. */
635 dict_reorder_vars (dict, v, nv);
637 /* Delete the remaining variables. */
638 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
639 for (i = nv; i < dict_get_var_cnt (dict); i++)
640 v[i - nv] = dict_get_var (dict, i);
641 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
652 MTF_FILE, /* Specified on FILE= subcommand. */
653 MTF_TABLE /* Specified on TABLE= subcommand. */
656 /* One of the FILEs or TABLEs on MATCH FILES. */
659 struct ll ll; /* In list of all files and tables. */
664 const struct variable **by; /* List of BY variables for this file. */
665 struct mtf_variable *vars; /* Variables to copy to output. */
666 size_t var_cnt; /* Number of other variables. */
668 struct file_handle *handle; /* Input file handle. */
669 struct dictionary *dict; /* Input file dictionary. */
670 struct casereader *reader; /* Input reader. */
671 struct ccase input; /* Input record (null at end of file). */
674 char *in_name; /* Variable name. */
675 struct variable *in_var; /* Variable (in master dictionary). */
680 struct variable *in_var;
681 struct variable *out_var;
684 /* MATCH FILES procedure. */
687 struct ll_list files; /* List of "struct mtf_file"s. */
688 int nonempty_files; /* FILEs that are not at end-of-file. */
690 bool ok; /* False if I/O error occurs. */
692 struct dictionary *dict; /* Dictionary of output file. */
693 struct casewriter *output; /* MATCH FILES output. */
695 size_t by_cnt; /* Number of variables on BY subcommand. */
698 Only if "first" or "last" is nonnull are the remaining
700 struct variable *first; /* Variable specified on FIRST (if any). */
701 struct variable *last; /* Variable specified on LAST (if any). */
702 struct ccase buffered_case; /* Case ready for output except that we don't
703 know the value for the LAST variable yet. */
704 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
705 const struct variable **prev_BY; /* Last set of BY variables. */
708 static void mtf_free (struct mtf_proc *);
710 static bool mtf_close_all_files (struct mtf_proc *);
711 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
712 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
714 static void mtf_process_case (struct mtf_proc *);
716 static bool create_flag_var (const char *subcommand_name, const char *var_name,
717 struct dictionary *, struct variable **);
718 static char *var_type_description (struct variable *);
720 /* Parse and execute the MATCH FILES command. */
722 cmd_match_files (struct lexer *lexer, struct dataset *ds)
725 struct ll *first_table;
726 struct mtf_file *file, *next;
729 struct casereader *active_file = NULL;
731 char first_name[LONG_NAME_LEN + 1] = "";
732 char last_name[LONG_NAME_LEN + 1] = "";
734 struct taint *taint = NULL;
738 ll_init (&mtf.files);
739 mtf.nonempty_files = 0;
740 first_table = ll_null (&mtf.files);
741 mtf.dict = dict_create ();
744 mtf.first = mtf.last = NULL;
745 case_nullify (&mtf.buffered_case);
746 case_nullify (&mtf.prev_BY_case);
749 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
751 lex_match (lexer, '/');
752 while (lex_token (lexer) == T_ID
753 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
754 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
756 struct mtf_file *file = xmalloc (sizeof *file);
761 file->in_name = NULL;
765 case_nullify (&file->input);
767 if (lex_match_id (lexer, "FILE"))
769 file->type = MTF_FILE;
770 ll_insert (first_table, &file->ll);
771 mtf.nonempty_files++;
773 else if (lex_match_id (lexer, "TABLE"))
775 file->type = MTF_TABLE;
776 ll_push_tail (&mtf.files, &file->ll);
777 if (first_table == ll_null (&mtf.files))
778 first_table = &file->ll;
782 lex_match (lexer, '=');
784 if (lex_match (lexer, '*'))
786 if (!proc_has_active_file (ds))
788 msg (SE, _("Cannot specify the active file since no active "
789 "file has been defined."));
793 if (proc_make_temporary_transformations_permanent (ds))
795 _("MATCH FILES may not be used after TEMPORARY when "
796 "the active file is an input source. "
797 "Temporary transformations will be made permanent."));
799 file->dict = dict_clone (dataset_dict (ds));
803 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
804 if (file->handle == NULL)
807 file->reader = any_reader_open (file->handle, &file->dict);
808 if (file->reader == NULL)
812 while (lex_match (lexer, '/'))
813 if (lex_match_id (lexer, "RENAME"))
815 if (!rename_variables (lexer, file->dict))
818 else if (lex_match_id (lexer, "IN"))
820 lex_match (lexer, '=');
821 if (lex_token (lexer) != T_ID)
823 lex_error (lexer, NULL);
827 if (file->in_name != NULL)
829 msg (SE, _("Multiple IN subcommands for a single FILE or "
833 file->in_name = xstrdup (lex_tokid (lexer));
838 mtf_merge_dictionary (mtf.dict, file);
841 while (lex_token (lexer) != '.')
843 if (lex_match (lexer, T_BY))
845 struct mtf_file *file;
846 struct variable **by;
851 lex_sbc_only_once ("BY");
855 lex_match (lexer, '=');
856 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
857 PV_NO_DUPLICATE | PV_NO_SCRATCH))
861 ll_for_each (file, struct mtf_file, ll, &mtf.files)
865 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
866 for (i = 0; i < mtf.by_cnt; i++)
868 const char *var_name = var_get_name (by[i]);
869 file->by[i] = dict_lookup_var (file->dict, var_name);
870 if (file->by[i] == NULL)
872 if (file->handle != NULL)
873 msg (SE, _("File %s lacks BY variable %s."),
874 fh_get_name (file->handle), var_name);
876 msg (SE, _("Active file lacks BY variable %s."),
887 else if (lex_match_id (lexer, "FIRST"))
889 if (first_name[0] != '\0')
891 lex_sbc_only_once ("FIRST");
895 lex_match (lexer, '=');
896 if (!lex_force_id (lexer))
898 strcpy (first_name, lex_tokid (lexer));
901 else if (lex_match_id (lexer, "LAST"))
903 if (last_name[0] != '\0')
905 lex_sbc_only_once ("LAST");
909 lex_match (lexer, '=');
910 if (!lex_force_id (lexer))
912 strcpy (last_name, lex_tokid (lexer));
915 else if (lex_match_id (lexer, "MAP"))
919 else if (lex_match_id (lexer, "DROP"))
921 if (!drop_variables (lexer, mtf.dict))
924 else if (lex_match_id (lexer, "KEEP"))
926 if (!keep_variables (lexer, mtf.dict))
931 lex_error (lexer, NULL);
935 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
937 lex_end_of_command (lexer);
944 if (first_table != ll_null (&mtf.files))
946 msg (SE, _("BY is required when TABLE is specified."));
951 msg (SE, _("BY is required when IN is specified."));
956 /* Set up mapping from each file's variables to master
958 ll_for_each (file, struct mtf_file, ll, &mtf.files)
960 size_t in_var_cnt = dict_get_var_cnt (file->dict);
962 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
964 for (i = 0; i < in_var_cnt; i++)
966 struct variable *in_var = dict_get_var (file->dict, i);
967 struct variable *out_var = dict_lookup_var (mtf.dict,
968 var_get_name (in_var));
972 struct mtf_variable *mv = &file->vars[file->var_cnt++];
974 mv->out_var = out_var;
979 /* Add IN, FIRST, and LAST variables to master dictionary. */
980 ll_for_each (file, struct mtf_file, ll, &mtf.files)
981 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
983 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
984 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
987 dict_delete_scratch_vars (mtf.dict);
988 dict_compact_values (mtf.dict);
989 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
990 taint = taint_clone (casewriter_get_taint (mtf.output));
992 ll_for_each (file, struct mtf_file, ll, &mtf.files)
994 if (file->reader == NULL)
996 if (active_file == NULL)
998 proc_discard_output (ds);
999 file->reader = active_file = proc_open (ds);
1002 file->reader = casereader_clone (active_file);
1004 taint_propagate (casereader_get_taint (file->reader), taint);
1007 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1008 mtf_read_record (&mtf, file);
1009 while (mtf.nonempty_files > 0)
1010 mtf_process_case (&mtf);
1011 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1013 if (mtf.last != NULL)
1014 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1015 casewriter_write (mtf.output, &mtf.buffered_case);
1016 case_nullify (&mtf.buffered_case);
1018 mtf_close_all_files (&mtf);
1019 if (active_file != NULL)
1022 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1028 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1031 if (active_file != NULL)
1034 taint_destroy (taint);
1035 return CMD_CASCADING_FAILURE;
1038 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1039 attempts to create a variable named VAR_NAME, with format
1040 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1041 Returns true if successful, false if the variable name is a
1042 duplicate (in which case a message saying that the variable
1043 specified on the given SUBCOMMAND is a duplicate is emitted).
1044 Also returns true, without doing anything, if VAR_NAME is null
1047 create_flag_var (const char *subcommand, const char *var_name,
1048 struct dictionary *dict, struct variable **var)
1050 if (var_name != NULL && var_name[0] != '\0')
1052 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1053 *var = dict_create_var (dict, var_name, 0);
1056 msg (SE, _("Variable name %s specified on %s subcommand "
1057 "duplicates an existing variable name."),
1058 subcommand, var_name);
1061 var_set_both_formats (*var, &format);
1068 /* Return a string in an allocated buffer describing V's variable
1071 var_type_description (struct variable *v)
1073 if (var_is_numeric (v))
1074 return xstrdup ("numeric");
1076 return xasprintf ("string with width %d", var_get_width (v));
1079 /* Closes all the files in MTF and frees their associated data.
1080 Returns true if successful, false if an I/O error occurred on
1081 any of the files. */
1083 mtf_close_all_files (struct mtf_proc *mtf)
1085 struct mtf_file *file;
1088 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1090 casereader_destroy (file->reader);
1092 dict_destroy (file->dict);
1093 free (file->in_name);
1094 case_destroy (&file->input);
1102 /* Frees all the data for the MATCH FILES procedure. */
1104 mtf_free (struct mtf_proc *mtf)
1106 mtf_close_all_files (mtf);
1107 dict_destroy (mtf->dict);
1108 casewriter_destroy (mtf->output);
1109 case_destroy (&mtf->buffered_case);
1110 case_destroy (&mtf->prev_BY_case);
1113 /* Reads the next record into FILE, if possible, and update MTF's
1114 nonempty_files count if not. */
1116 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1118 case_destroy (&file->input);
1119 if (!casereader_read (file->reader, &file->input))
1121 mtf->nonempty_files--;
1128 /* Compare the BY variables for files A and B; return -1 if A <
1129 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1130 then all records are equal.) */
1132 mtf_compare_BY_values (struct mtf_proc *mtf,
1133 struct mtf_file *a, struct mtf_file *b)
1135 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1138 /* Processes input files and write one case to the output file. */
1140 mtf_process_case (struct mtf_proc *mtf)
1143 struct mtf_file *min;
1144 struct mtf_file *file;
1148 /* Find the set of one or more FILEs whose BY values are
1149 minimal, as well as the set of zero or more TABLEs whose BY
1150 values equal those of the minimum FILEs.
1152 After each iteration of the loop, this invariant holds: the
1153 FILEs with minimum BY values thus far have "sequence"
1154 members equal to min_sequence, and "min" points to one of
1155 the mtf_files whose case has those minimum BY values, and
1156 similarly for TABLEs. */
1159 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1160 if (case_is_null (&file->input))
1161 file->sequence = -1;
1162 else if (file->type == MTF_FILE)
1164 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1166 file->sequence = cmp < 0 ? -1 : min_sequence;
1169 file->sequence = ++min_sequence;
1176 assert (min != NULL);
1179 cmp = mtf_compare_BY_values (mtf, min, file);
1181 while (cmp > 0 && mtf_read_record (mtf, file));
1182 file->sequence = cmp == 0 ? min_sequence : -1;
1185 /* Form the output case from the input cases. */
1186 case_create (&c, dict_get_next_value_idx (mtf->dict));
1187 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1189 struct variable *v = dict_get_var (mtf->dict, i);
1190 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1192 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1194 bool include_file = file->sequence == min_sequence;
1196 for (i = 0; i < file->var_cnt; i++)
1198 const struct mtf_variable *mv = &file->vars[i];
1199 const union value *in = case_data (&file->input, mv->in_var);
1200 union value *out = case_data_rw (&c, mv->out_var);
1201 value_copy (out, in, var_get_width (mv->in_var));
1203 if (file->in_var != NULL)
1204 case_data_rw (&c, file->in_var)->f = include_file;
1207 /* Write the output case. */
1208 if (mtf->first == NULL && mtf->last == NULL)
1210 /* With no FIRST or LAST variables, it's trivial. */
1211 casewriter_write (mtf->output, &c);
1215 /* It's harder with LAST, because we can't know whether
1216 this case is the last in a group until we've prepared
1217 the *next* case also. Thus, we buffer the previous
1218 output case until the next one is ready.
1220 We also have to save a copy of one of the previous input
1221 cases, so that we can compare the BY variables. We
1222 can't compare the BY variables between the current
1223 output case and the saved one because the BY variables
1224 might not be in the output (the user is allowed to drop
1227 if (mtf->prev_BY != NULL)
1229 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1230 min->by, mtf->prev_BY,
1232 if (mtf->last != NULL)
1233 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1234 casewriter_write (mtf->output, &mtf->buffered_case);
1239 case_move (&mtf->buffered_case, &c);
1240 if (mtf->first != NULL)
1241 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1245 mtf->prev_BY = min->by;
1246 case_destroy (&mtf->prev_BY_case);
1247 case_clone (&mtf->prev_BY_case, &min->input);
1251 /* Read another record from each input file FILE with minimum
1253 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1254 if (file->type == MTF_FILE)
1256 if (file->sequence == min_sequence)
1257 mtf_read_record (mtf, file);
1263 /* Merge the dictionary for file F into master dictionary M. */
1265 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1267 struct dictionary *d = f->dict;
1268 const char *d_docs, *m_docs;
1271 if (dict_get_label (m) == NULL)
1272 dict_set_label (m, dict_get_label (d));
1274 d_docs = dict_get_documents (d);
1275 m_docs = dict_get_documents (m);
1279 dict_set_documents (m, d_docs);
1282 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1283 dict_set_documents (m, new_docs);
1288 for (i = 0; i < dict_get_var_cnt (d); i++)
1290 struct variable *dv = dict_get_var (d, i);
1291 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1293 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1298 if (var_get_width (mv) != var_get_width (dv))
1300 char *dv_description = var_type_description (dv);
1301 char *mv_description = var_type_description (mv);
1302 msg (SE, _("Variable %s in file %s (%s) has different "
1303 "type or width from the same variable in "
1304 "earlier file (%s)."),
1305 var_get_name (dv), fh_get_name (f->handle),
1306 dv_description, mv_description);
1307 free (dv_description);
1308 free (mv_description);
1312 if (var_get_width (dv) == var_get_width (mv))
1314 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1315 var_set_value_labels (mv, var_get_value_labels (dv));
1316 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1317 var_set_missing_values (mv, var_get_missing_values (dv));
1320 if (var_get_label (dv) && !var_get_label (mv))
1321 var_set_label (mv, var_get_label (dv));
1324 mv = dict_clone_var_assert (m, dv, var_get_name (dv));
1332 A case map copies data from a case that corresponds for one
1333 dictionary to a case that corresponds to a second dictionary
1334 derived from the first by, optionally, deleting, reordering,
1335 or renaming variables. (No new variables may be created.)
1341 size_t value_cnt; /* Number of values in map. */
1342 int *map; /* For each destination index, the
1343 corresponding source index. */
1346 /* Prepares dictionary D for producing a case map. Afterward,
1347 the caller may delete, reorder, or rename variables within D
1348 at will before using finish_case_map() to produce the case
1351 Uses D's aux members, which must otherwise not be in use. */
1353 start_case_map (struct dictionary *d)
1355 size_t var_cnt = dict_get_var_cnt (d);
1358 for (i = 0; i < var_cnt; i++)
1360 struct variable *v = dict_get_var (d, i);
1361 int *src_fv = xmalloc (sizeof *src_fv);
1362 *src_fv = var_get_case_index (v);
1363 var_attach_aux (v, src_fv, var_dtor_free);
1367 /* Produces a case map from dictionary D, which must have been
1368 previously prepared with start_case_map().
1370 Does not retain any reference to D, and clears the aux members
1371 set up by start_case_map().
1373 Returns the new case map, or a null pointer if no mapping is
1374 required (that is, no data has changed position). */
1375 static struct case_map *
1376 finish_case_map (struct dictionary *d)
1378 struct case_map *map;
1379 size_t var_cnt = dict_get_var_cnt (d);
1383 map = xmalloc (sizeof *map);
1384 map->value_cnt = dict_get_next_value_idx (d);
1385 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1386 for (i = 0; i < map->value_cnt; i++)
1390 for (i = 0; i < var_cnt; i++)
1392 struct variable *v = dict_get_var (d, i);
1393 size_t value_cnt = var_get_value_cnt (v);
1394 int *src_fv = (int *) var_detach_aux (v);
1397 if (var_get_case_index (v) != *src_fv)
1400 for (idx = 0; idx < value_cnt; idx++)
1402 int src_idx = *src_fv + idx;
1403 int dst_idx = var_get_case_index (v) + idx;
1405 assert (map->map[dst_idx] == -1);
1406 map->map[dst_idx] = src_idx;
1413 destroy_case_map (map);
1417 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1423 /* Maps from SRC to DST, applying case map MAP. */
1425 map_case (const struct case_map *map,
1426 const struct ccase *src, struct ccase *dst)
1430 case_create (dst, map->value_cnt);
1431 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1433 int src_idx = map->map[dst_idx];
1435 *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
1439 /* Destroys case map MAP. */
1441 destroy_case_map (struct case_map *map)
1450 /* Returns the number of `union value's in cases created by
1453 case_map_get_value_cnt (const struct case_map *map)
1455 return map->value_cnt;