1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2007 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #include <data/any-reader.h>
22 #include <data/any-writer.h>
23 #include <data/case.h>
24 #include <data/casereader.h>
25 #include <data/casewriter.h>
26 #include <data/format.h>
27 #include <data/dictionary.h>
28 #include <data/por-file-writer.h>
29 #include <data/procedure.h>
30 #include <data/settings.h>
31 #include <data/sys-file-writer.h>
32 #include <data/transformations.h>
33 #include <data/value-labels.h>
34 #include <data/variable.h>
35 #include <language/command.h>
36 #include <language/data-io/file-handle.h>
37 #include <language/lexer/lexer.h>
38 #include <language/lexer/variable-parser.h>
39 #include <libpspp/alloc.h>
40 #include <libpspp/assertion.h>
41 #include <libpspp/compiler.h>
42 #include <libpspp/hash.h>
43 #include <libpspp/message.h>
44 #include <libpspp/misc.h>
45 #include <libpspp/str.h>
46 #include <libpspp/taint.h>
49 #define _(msgid) gettext (msgid)
51 /* Rearranging and reducing a dictionary. */
52 static void start_case_map (struct dictionary *);
53 static struct case_map *finish_case_map (struct dictionary *);
54 static void map_case (const struct case_map *,
55 const struct ccase *, struct ccase *);
56 static void destroy_case_map (struct case_map *);
57 static size_t case_map_get_value_cnt (const struct case_map *);
59 static bool parse_dict_trim (struct lexer *, struct dictionary *);
61 /* Reading system and portable files. */
63 /* Type of command. */
70 static void get_translate_case (const struct ccase *, struct ccase *,
72 static bool get_destroy_case_map (void *map_);
74 /* Parses a GET or IMPORT command. */
76 parse_read_command (struct lexer *lexer, struct dataset *ds, enum reader_command type)
78 struct casereader *reader = NULL;
79 struct file_handle *fh = NULL;
80 struct dictionary *dict = NULL;
81 struct case_map *map = NULL;
85 lex_match (lexer, '/');
87 if (lex_match_id (lexer, "FILE") || lex_token (lexer) == T_STRING)
89 lex_match (lexer, '=');
91 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
95 else if (type == IMPORT_CMD && lex_match_id (lexer, "TYPE"))
97 lex_match (lexer, '=');
99 if (lex_match_id (lexer, "COMM"))
101 else if (lex_match_id (lexer, "TAPE"))
105 lex_error (lexer, _("expecting COMM or TAPE"));
115 lex_sbc_missing (lexer, "FILE");
119 reader = any_reader_open (fh, &dict);
123 start_case_map (dict);
125 while (lex_token (lexer) != '.')
127 lex_match (lexer, '/');
128 if (!parse_dict_trim (lexer, dict))
132 map = finish_case_map (dict);
134 reader = casereader_create_translator (reader,
135 dict_get_next_value_idx (dict),
137 get_destroy_case_map,
140 proc_set_active_file (ds, reader, dict);
145 casereader_destroy (reader);
148 return CMD_CASCADING_FAILURE;
152 get_translate_case (const struct ccase *input, struct ccase *output,
155 struct case_map *map = map_;
156 map_case (map, input, output);
160 get_destroy_case_map (void *map_)
162 struct case_map *map = map_;
163 destroy_case_map (map);
169 cmd_get (struct lexer *lexer, struct dataset *ds)
171 return parse_read_command (lexer, ds, GET_CMD);
176 cmd_import (struct lexer *lexer, struct dataset *ds)
178 return parse_read_command (lexer, ds, IMPORT_CMD);
181 /* Writing system and portable files. */
183 /* Type of output file. */
186 SYSFILE_WRITER, /* System file. */
187 PORFILE_WRITER /* Portable file. */
190 /* Type of a command. */
193 XFORM_CMD, /* Transformation. */
194 PROC_CMD /* Procedure. */
197 /* Parses SAVE or XSAVE or EXPORT or XEXPORT command.
198 WRITER_TYPE identifies the type of file to write,
199 and COMMAND_TYPE identifies the type of command.
201 On success, returns a writer.
202 For procedures only, sets *RETAIN_UNSELECTED to true if cases
203 that would otherwise be excluded by FILTER or USE should be
206 On failure, returns a null pointer. */
207 static struct casewriter *
208 parse_write_command (struct lexer *lexer, struct dataset *ds,
209 enum writer_type writer_type,
210 enum command_type command_type,
211 bool *retain_unselected)
214 struct file_handle *handle; /* Output file. */
215 struct dictionary *dict; /* Dictionary for output file. */
216 struct casewriter *writer; /* Writer. */
217 struct case_map *map; /* Map from input data to data for writer. */
219 /* Common options. */
220 bool print_map; /* Print map? TODO. */
221 bool print_short_names; /* Print long-to-short name map. TODO. */
222 struct sfm_write_options sysfile_opts;
223 struct pfm_write_options porfile_opts;
225 assert (writer_type == SYSFILE_WRITER || writer_type == PORFILE_WRITER);
226 assert (command_type == XFORM_CMD || command_type == PROC_CMD);
227 assert ((retain_unselected != NULL) == (command_type == PROC_CMD));
229 if (command_type == PROC_CMD)
230 *retain_unselected = true;
233 dict = dict_clone (dataset_dict (ds));
237 print_short_names = false;
238 sysfile_opts = sfm_writer_default_options ();
239 porfile_opts = pfm_writer_default_options ();
241 start_case_map (dict);
242 dict_delete_scratch_vars (dict);
244 lex_match (lexer, '/');
247 if (lex_match_id (lexer, "OUTFILE"))
251 lex_sbc_only_once ("OUTFILE");
255 lex_match (lexer, '=');
257 handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
261 else if (lex_match_id (lexer, "NAMES"))
262 print_short_names = true;
263 else if (lex_match_id (lexer, "PERMISSIONS"))
267 lex_match (lexer, '=');
268 if (lex_match_id (lexer, "READONLY"))
270 else if (lex_match_id (lexer, "WRITEABLE"))
274 lex_error (lexer, _("expecting %s or %s"), "READONLY", "WRITEABLE");
277 sysfile_opts.create_writeable = porfile_opts.create_writeable = cw;
279 else if (command_type == PROC_CMD && lex_match_id (lexer, "UNSELECTED"))
281 lex_match (lexer, '=');
282 if (lex_match_id (lexer, "RETAIN"))
283 *retain_unselected = true;
284 else if (lex_match_id (lexer, "DELETE"))
285 *retain_unselected = false;
288 lex_error (lexer, _("expecting %s or %s"), "RETAIN", "DELETE");
292 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "COMPRESSED"))
293 sysfile_opts.compress = true;
294 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "UNCOMPRESSED"))
295 sysfile_opts.compress = false;
296 else if (writer_type == SYSFILE_WRITER && lex_match_id (lexer, "VERSION"))
298 lex_match (lexer, '=');
299 if (!lex_force_int (lexer))
301 sysfile_opts.version = lex_integer (lexer);
304 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "TYPE"))
306 lex_match (lexer, '=');
307 if (lex_match_id (lexer, "COMMUNICATIONS"))
308 porfile_opts.type = PFM_COMM;
309 else if (lex_match_id (lexer, "TAPE"))
310 porfile_opts.type = PFM_TAPE;
313 lex_error (lexer, _("expecting %s or %s"), "COMM", "TAPE");
317 else if (writer_type == PORFILE_WRITER && lex_match_id (lexer, "DIGITS"))
319 lex_match (lexer, '=');
320 if (!lex_force_int (lexer))
322 porfile_opts.digits = lex_integer (lexer);
325 else if (!parse_dict_trim (lexer, dict))
328 if (!lex_match (lexer, '/'))
331 if (lex_end_of_command (lexer) != CMD_SUCCESS)
336 lex_sbc_missing (lexer, "OUTFILE");
340 dict_compact_values (dict);
342 if (fh_get_referent (handle) == FH_REF_FILE)
347 writer = sfm_open_writer (handle, dict, sysfile_opts);
350 writer = pfm_open_writer (handle, dict, porfile_opts);
355 writer = any_writer_open (handle, dict);
359 map = finish_case_map (dict);
361 writer = casewriter_create_translator (writer,
362 case_map_get_value_cnt (map),
364 get_destroy_case_map,
371 casewriter_destroy (writer);
373 destroy_case_map (map);
377 /* SAVE and EXPORT. */
379 /* Parses and performs the SAVE or EXPORT procedure. */
381 parse_output_proc (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
383 bool retain_unselected;
384 struct variable *saved_filter_variable;
385 struct casewriter *output;
388 output = parse_write_command (lexer, ds, writer_type, PROC_CMD,
391 return CMD_CASCADING_FAILURE;
393 saved_filter_variable = dict_get_filter (dataset_dict (ds));
394 if (retain_unselected)
395 dict_set_filter (dataset_dict (ds), NULL);
397 casereader_transfer (proc_open (ds), output);
398 ok = casewriter_destroy (output);
399 ok = proc_commit (ds) && ok;
401 dict_set_filter (dataset_dict (ds), saved_filter_variable);
403 return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
407 cmd_save (struct lexer *lexer, struct dataset *ds)
409 return parse_output_proc (lexer, ds, SYSFILE_WRITER);
413 cmd_export (struct lexer *lexer, struct dataset *ds)
415 return parse_output_proc (lexer, ds, PORFILE_WRITER);
418 /* XSAVE and XEXPORT. */
420 /* Transformation. */
423 struct casewriter *writer; /* Writer. */
426 static trns_proc_func output_trns_proc;
427 static trns_free_func output_trns_free;
429 /* Parses the XSAVE or XEXPORT transformation command. */
431 parse_output_trns (struct lexer *lexer, struct dataset *ds, enum writer_type writer_type)
433 struct output_trns *t = xmalloc (sizeof *t);
434 t->writer = parse_write_command (lexer, ds, writer_type, XFORM_CMD, NULL);
435 if (t->writer == NULL)
438 return CMD_CASCADING_FAILURE;
441 add_transformation (ds, output_trns_proc, output_trns_free, t);
445 /* Writes case C to the system file specified on XSAVE or XEXPORT. */
447 output_trns_proc (void *trns_, struct ccase *c, casenumber case_num UNUSED)
449 struct output_trns *t = trns_;
451 case_clone (&tmp, c);
452 casewriter_write (t->writer, &tmp);
453 return TRNS_CONTINUE;
456 /* Frees an XSAVE or XEXPORT transformation.
457 Returns true if successful, false if an I/O error occurred. */
459 output_trns_free (void *trns_)
461 struct output_trns *t = trns_;
462 bool ok = casewriter_destroy (t->writer);
469 cmd_xsave (struct lexer *lexer, struct dataset *ds)
471 return parse_output_trns (lexer, ds, SYSFILE_WRITER);
474 /* XEXPORT command. */
476 cmd_xexport (struct lexer *lexer, struct dataset *ds)
478 return parse_output_trns (lexer, ds, PORFILE_WRITER);
481 static bool rename_variables (struct lexer *lexer, struct dictionary *dict);
482 static bool drop_variables (struct lexer *, struct dictionary *dict);
483 static bool keep_variables (struct lexer *, struct dictionary *dict);
485 /* Commands that read and write system files share a great deal
486 of common syntactic structure for rearranging and dropping
487 variables. This function parses this syntax and modifies DICT
488 appropriately. Returns true on success, false on failure. */
490 parse_dict_trim (struct lexer *lexer, struct dictionary *dict)
492 if (lex_match_id (lexer, "MAP"))
497 else if (lex_match_id (lexer, "DROP"))
498 return drop_variables (lexer, dict);
499 else if (lex_match_id (lexer, "KEEP"))
500 return keep_variables (lexer, dict);
501 else if (lex_match_id (lexer, "RENAME"))
502 return rename_variables (lexer, dict);
505 lex_error (lexer, _("expecting a valid subcommand"));
510 /* Parses and performs the RENAME subcommand of GET and SAVE. */
512 rename_variables (struct lexer *lexer, struct dictionary *dict)
525 lex_match (lexer, '=');
526 if (lex_token (lexer) != '(')
530 v = parse_variable (lexer, dict);
533 if (!lex_force_match (lexer, '=')
534 || !lex_force_id (lexer))
536 if (dict_lookup_var (dict, lex_tokid (lexer)) != NULL)
538 msg (SE, _("Cannot rename %s as %s because there already exists "
539 "a variable named %s. To rename variables with "
540 "overlapping names, use a single RENAME subcommand "
541 "such as \"/RENAME (A=B)(B=C)(C=A)\", or equivalently, "
542 "\"/RENAME (A B C=B C A)\"."),
543 var_get_name (v), lex_tokid (lexer), lex_tokid (lexer));
547 dict_rename_var (dict, v, lex_tokid (lexer));
556 while (lex_match (lexer, '('))
560 if (!parse_variables (lexer, dict, &v, &nv, PV_NO_DUPLICATE | PV_APPEND))
562 if (!lex_match (lexer, '='))
564 msg (SE, _("`=' expected after variable list."));
567 if (!parse_DATA_LIST_vars (lexer, &new_names, &nn, PV_APPEND | PV_NO_SCRATCH))
571 msg (SE, _("Number of variables on left side of `=' (%d) does not "
572 "match number of variables on right side (%d), in "
573 "parenthesized group %d of RENAME subcommand."),
574 (unsigned) (nv - old_nv), (unsigned) (nn - old_nv), group);
577 if (!lex_force_match (lexer, ')'))
582 if (!dict_rename_vars (dict, v, new_names, nv, &err_name))
584 msg (SE, _("Requested renaming duplicates variable name %s."), err_name);
590 for (i = 0; i < nn; i++)
598 /* Parses and performs the DROP subcommand of GET and SAVE.
599 Returns true if successful, false on failure.*/
601 drop_variables (struct lexer *lexer, struct dictionary *dict)
606 lex_match (lexer, '=');
607 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
609 dict_delete_vars (dict, v, nv);
612 if (dict_get_var_cnt (dict) == 0)
614 msg (SE, _("Cannot DROP all variables from dictionary."));
620 /* Parses and performs the KEEP subcommand of GET and SAVE.
621 Returns true if successful, false on failure.*/
623 keep_variables (struct lexer *lexer, struct dictionary *dict)
629 lex_match (lexer, '=');
630 if (!parse_variables (lexer, dict, &v, &nv, PV_NONE))
633 /* Move the specified variables to the beginning. */
634 dict_reorder_vars (dict, v, nv);
636 /* Delete the remaining variables. */
637 v = xnrealloc (v, dict_get_var_cnt (dict) - nv, sizeof *v);
638 for (i = nv; i < dict_get_var_cnt (dict); i++)
639 v[i - nv] = dict_get_var (dict, i);
640 dict_delete_vars (dict, v, dict_get_var_cnt (dict) - nv);
651 MTF_FILE, /* Specified on FILE= subcommand. */
652 MTF_TABLE /* Specified on TABLE= subcommand. */
655 /* One of the FILEs or TABLEs on MATCH FILES. */
658 struct ll ll; /* In list of all files and tables. */
663 const struct variable **by; /* List of BY variables for this file. */
664 struct mtf_variable *vars; /* Variables to copy to output. */
665 size_t var_cnt; /* Number of other variables. */
667 struct file_handle *handle; /* Input file handle. */
668 struct dictionary *dict; /* Input file dictionary. */
669 struct casereader *reader; /* Input reader. */
670 struct ccase input; /* Input record (null at end of file). */
673 char *in_name; /* Variable name. */
674 struct variable *in_var; /* Variable (in master dictionary). */
679 struct variable *in_var;
680 struct variable *out_var;
683 /* MATCH FILES procedure. */
686 struct ll_list files; /* List of "struct mtf_file"s. */
687 int nonempty_files; /* FILEs that are not at end-of-file. */
689 bool ok; /* False if I/O error occurs. */
691 struct dictionary *dict; /* Dictionary of output file. */
692 struct casewriter *output; /* MATCH FILES output. */
694 size_t by_cnt; /* Number of variables on BY subcommand. */
697 Only if "first" or "last" is nonnull are the remaining
699 struct variable *first; /* Variable specified on FIRST (if any). */
700 struct variable *last; /* Variable specified on LAST (if any). */
701 struct ccase buffered_case; /* Case ready for output except that we don't
702 know the value for the LAST variable yet. */
703 struct ccase prev_BY_case; /* Case with values of last set of BY vars. */
704 const struct variable **prev_BY; /* Last set of BY variables. */
707 static void mtf_free (struct mtf_proc *);
709 static bool mtf_close_all_files (struct mtf_proc *);
710 static bool mtf_merge_dictionary (struct dictionary *const, struct mtf_file *);
711 static bool mtf_read_record (struct mtf_proc *mtf, struct mtf_file *);
713 static void mtf_process_case (struct mtf_proc *);
715 static bool create_flag_var (const char *subcommand_name, const char *var_name,
716 struct dictionary *, struct variable **);
717 static char *var_type_description (struct variable *);
719 /* Parse and execute the MATCH FILES command. */
721 cmd_match_files (struct lexer *lexer, struct dataset *ds)
724 struct ll *first_table;
725 struct mtf_file *file, *next;
728 struct casereader *active_file = NULL;
730 char first_name[LONG_NAME_LEN + 1] = "";
731 char last_name[LONG_NAME_LEN + 1] = "";
733 struct taint *taint = NULL;
737 ll_init (&mtf.files);
738 mtf.nonempty_files = 0;
739 first_table = ll_null (&mtf.files);
740 mtf.dict = dict_create ();
743 mtf.first = mtf.last = NULL;
744 case_nullify (&mtf.buffered_case);
745 case_nullify (&mtf.prev_BY_case);
748 dict_set_case_limit (mtf.dict, dict_get_case_limit (dataset_dict (ds)));
750 lex_match (lexer, '/');
751 while (lex_token (lexer) == T_ID
752 && (lex_id_match (ss_cstr ("FILE"), ss_cstr (lex_tokid (lexer)))
753 || lex_id_match (ss_cstr ("TABLE"), ss_cstr (lex_tokid (lexer)))))
755 struct mtf_file *file = xmalloc (sizeof *file);
760 file->in_name = NULL;
764 case_nullify (&file->input);
766 if (lex_match_id (lexer, "FILE"))
768 file->type = MTF_FILE;
769 ll_insert (first_table, &file->ll);
770 mtf.nonempty_files++;
772 else if (lex_match_id (lexer, "TABLE"))
774 file->type = MTF_TABLE;
775 ll_push_tail (&mtf.files, &file->ll);
776 if (first_table == ll_null (&mtf.files))
777 first_table = &file->ll;
781 lex_match (lexer, '=');
783 if (lex_match (lexer, '*'))
785 if (!proc_has_active_file (ds))
787 msg (SE, _("Cannot specify the active file since no active "
788 "file has been defined."));
792 if (proc_make_temporary_transformations_permanent (ds))
794 _("MATCH FILES may not be used after TEMPORARY when "
795 "the active file is an input source. "
796 "Temporary transformations will be made permanent."));
798 file->dict = dict_clone (dataset_dict (ds));
802 file->handle = fh_parse (lexer, FH_REF_FILE | FH_REF_SCRATCH);
803 if (file->handle == NULL)
806 file->reader = any_reader_open (file->handle, &file->dict);
807 if (file->reader == NULL)
811 while (lex_match (lexer, '/'))
812 if (lex_match_id (lexer, "RENAME"))
814 if (!rename_variables (lexer, file->dict))
817 else if (lex_match_id (lexer, "IN"))
819 lex_match (lexer, '=');
820 if (lex_token (lexer) != T_ID)
822 lex_error (lexer, NULL);
826 if (file->in_name != NULL)
828 msg (SE, _("Multiple IN subcommands for a single FILE or "
832 file->in_name = xstrdup (lex_tokid (lexer));
837 mtf_merge_dictionary (mtf.dict, file);
840 while (lex_token (lexer) != '.')
842 if (lex_match (lexer, T_BY))
844 struct mtf_file *file;
845 struct variable **by;
850 lex_sbc_only_once ("BY");
854 lex_match (lexer, '=');
855 if (!parse_variables (lexer, mtf.dict, &by, &mtf.by_cnt,
856 PV_NO_DUPLICATE | PV_NO_SCRATCH))
860 ll_for_each (file, struct mtf_file, ll, &mtf.files)
864 file->by = xnmalloc (mtf.by_cnt, sizeof *file->by);
865 for (i = 0; i < mtf.by_cnt; i++)
867 const char *var_name = var_get_name (by[i]);
868 file->by[i] = dict_lookup_var (file->dict, var_name);
869 if (file->by[i] == NULL)
871 if (file->handle != NULL)
872 msg (SE, _("File %s lacks BY variable %s."),
873 fh_get_name (file->handle), var_name);
875 msg (SE, _("Active file lacks BY variable %s."),
886 else if (lex_match_id (lexer, "FIRST"))
888 if (first_name[0] != '\0')
890 lex_sbc_only_once ("FIRST");
894 lex_match (lexer, '=');
895 if (!lex_force_id (lexer))
897 strcpy (first_name, lex_tokid (lexer));
900 else if (lex_match_id (lexer, "LAST"))
902 if (last_name[0] != '\0')
904 lex_sbc_only_once ("LAST");
908 lex_match (lexer, '=');
909 if (!lex_force_id (lexer))
911 strcpy (last_name, lex_tokid (lexer));
914 else if (lex_match_id (lexer, "MAP"))
918 else if (lex_match_id (lexer, "DROP"))
920 if (!drop_variables (lexer, mtf.dict))
923 else if (lex_match_id (lexer, "KEEP"))
925 if (!keep_variables (lexer, mtf.dict))
930 lex_error (lexer, NULL);
934 if (!lex_match (lexer, '/') && lex_token (lexer) != '.')
936 lex_end_of_command (lexer);
943 if (first_table != ll_null (&mtf.files))
945 msg (SE, _("BY is required when TABLE is specified."));
950 msg (SE, _("BY is required when IN is specified."));
955 /* Set up mapping from each file's variables to master
957 ll_for_each (file, struct mtf_file, ll, &mtf.files)
959 size_t in_var_cnt = dict_get_var_cnt (file->dict);
961 file->vars = xnmalloc (in_var_cnt, sizeof *file->vars);
963 for (i = 0; i < in_var_cnt; i++)
965 struct variable *in_var = dict_get_var (file->dict, i);
966 struct variable *out_var = dict_lookup_var (mtf.dict,
967 var_get_name (in_var));
971 struct mtf_variable *mv = &file->vars[file->var_cnt++];
973 mv->out_var = out_var;
978 /* Add IN, FIRST, and LAST variables to master dictionary. */
979 ll_for_each (file, struct mtf_file, ll, &mtf.files)
980 if (!create_flag_var ("IN", file->in_name, mtf.dict, &file->in_var))
982 if (!create_flag_var ("FIRST", first_name, mtf.dict, &mtf.first)
983 || !create_flag_var ("LAST", last_name, mtf.dict, &mtf.last))
986 dict_compact_values (mtf.dict);
987 mtf.output = autopaging_writer_create (dict_get_next_value_idx (mtf.dict));
988 taint = taint_clone (casewriter_get_taint (mtf.output));
990 ll_for_each (file, struct mtf_file, ll, &mtf.files)
992 if (file->reader == NULL)
994 if (active_file == NULL)
996 proc_discard_output (ds);
997 file->reader = active_file = proc_open (ds);
1000 file->reader = casereader_clone (active_file);
1002 taint_propagate (casereader_get_taint (file->reader), taint);
1005 ll_for_each_safe (file, next, struct mtf_file, ll, &mtf.files)
1006 mtf_read_record (&mtf, file);
1007 while (mtf.nonempty_files > 0)
1008 mtf_process_case (&mtf);
1009 if ((mtf.first != NULL || mtf.last != NULL) && mtf.prev_BY != NULL)
1011 if (mtf.last != NULL)
1012 case_data_rw (&mtf.buffered_case, mtf.last)->f = 1.0;
1013 casewriter_write (mtf.output, &mtf.buffered_case);
1014 case_nullify (&mtf.buffered_case);
1016 mtf_close_all_files (&mtf);
1017 if (active_file != NULL)
1020 proc_set_active_file (ds, casewriter_make_reader (mtf.output), mtf.dict);
1026 return taint_destroy (taint) ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
1029 if (active_file != NULL)
1032 taint_destroy (taint);
1033 return CMD_CASCADING_FAILURE;
1036 /* If VAR_NAME is a nonnull pointer to a non-empty string,
1037 attempts to create a variable named VAR_NAME, with format
1038 F1.0, in DICT, and stores a pointer to the variable in *VAR.
1039 Returns true if successful, false if the variable name is a
1040 duplicate (in which case a message saying that the variable
1041 specified on the given SUBCOMMAND is a duplicate is emitted).
1042 Also returns true, without doing anything, if VAR_NAME is null
1045 create_flag_var (const char *subcommand, const char *var_name,
1046 struct dictionary *dict, struct variable **var)
1048 if (var_name != NULL && var_name[0] != '\0')
1050 struct fmt_spec format = fmt_for_output (FMT_F, 1, 0);
1051 *var = dict_create_var (dict, var_name, 0);
1054 msg (SE, _("Variable name %s specified on %s subcommand "
1055 "duplicates an existing variable name."),
1056 subcommand, var_name);
1059 var_set_both_formats (*var, &format);
1066 /* Return a string in an allocated buffer describing V's variable
1069 var_type_description (struct variable *v)
1071 if (var_is_numeric (v))
1072 return xstrdup ("numeric");
1074 return xasprintf ("string with width %d", var_get_width (v));
1077 /* Closes all the files in MTF and frees their associated data.
1078 Returns true if successful, false if an I/O error occurred on
1079 any of the files. */
1081 mtf_close_all_files (struct mtf_proc *mtf)
1083 struct mtf_file *file;
1086 ll_for_each_preremove (file, struct mtf_file, ll, &mtf->files)
1088 casereader_destroy (file->reader);
1090 dict_destroy (file->dict);
1091 free (file->in_name);
1092 case_destroy (&file->input);
1100 /* Frees all the data for the MATCH FILES procedure. */
1102 mtf_free (struct mtf_proc *mtf)
1104 mtf_close_all_files (mtf);
1105 dict_destroy (mtf->dict);
1106 casewriter_destroy (mtf->output);
1107 case_destroy (&mtf->buffered_case);
1108 case_destroy (&mtf->prev_BY_case);
1111 /* Reads the next record into FILE, if possible, and update MTF's
1112 nonempty_files count if not. */
1114 mtf_read_record (struct mtf_proc *mtf, struct mtf_file *file)
1116 case_destroy (&file->input);
1117 if (!casereader_read (file->reader, &file->input))
1119 mtf->nonempty_files--;
1126 /* Compare the BY variables for files A and B; return -1 if A <
1127 B, 0 if A == B, 1 if A > B. (If there are no BY variables,
1128 then all records are equal.) */
1130 mtf_compare_BY_values (struct mtf_proc *mtf,
1131 struct mtf_file *a, struct mtf_file *b)
1133 return case_compare_2dict (&a->input, &b->input, a->by, b->by, mtf->by_cnt);
1136 /* Processes input files and write one case to the output file. */
1138 mtf_process_case (struct mtf_proc *mtf)
1141 struct mtf_file *min;
1142 struct mtf_file *file;
1146 /* Find the set of one or more FILEs whose BY values are
1147 minimal, as well as the set of zero or more TABLEs whose BY
1148 values equal those of the minimum FILEs.
1150 After each iteration of the loop, this invariant holds: the
1151 FILEs with minimum BY values thus far have "sequence"
1152 members equal to min_sequence, and "min" points to one of
1153 the mtf_files whose case has those minimum BY values, and
1154 similarly for TABLEs. */
1157 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1158 if (case_is_null (&file->input))
1159 file->sequence = -1;
1160 else if (file->type == MTF_FILE)
1162 int cmp = min != NULL ? mtf_compare_BY_values (mtf, min, file) : 1;
1164 file->sequence = cmp < 0 ? -1 : min_sequence;
1167 file->sequence = ++min_sequence;
1174 assert (min != NULL);
1177 cmp = mtf_compare_BY_values (mtf, min, file);
1179 while (cmp > 0 && mtf_read_record (mtf, file));
1180 file->sequence = cmp == 0 ? min_sequence : -1;
1183 /* Form the output case from the input cases. */
1184 case_create (&c, dict_get_next_value_idx (mtf->dict));
1185 for (i = 0; i < dict_get_var_cnt (mtf->dict); i++)
1187 struct variable *v = dict_get_var (mtf->dict, i);
1188 value_set_missing (case_data_rw (&c, v), var_get_width (v));
1190 ll_for_each_reverse (file, struct mtf_file, ll, &mtf->files)
1192 bool include_file = file->sequence == min_sequence;
1194 for (i = 0; i < file->var_cnt; i++)
1196 const struct mtf_variable *mv = &file->vars[i];
1197 const union value *in = case_data (&file->input, mv->in_var);
1198 union value *out = case_data_rw (&c, mv->out_var);
1199 value_copy (out, in, var_get_width (mv->in_var));
1201 if (file->in_var != NULL)
1202 case_data_rw (&c, file->in_var)->f = include_file;
1205 /* Write the output case. */
1206 if (mtf->first == NULL && mtf->last == NULL)
1208 /* With no FIRST or LAST variables, it's trivial. */
1209 casewriter_write (mtf->output, &c);
1213 /* It's harder with LAST, because we can't know whether
1214 this case is the last in a group until we've prepared
1215 the *next* case also. Thus, we buffer the previous
1216 output case until the next one is ready.
1218 We also have to save a copy of one of the previous input
1219 cases, so that we can compare the BY variables. We
1220 can't compare the BY variables between the current
1221 output case and the saved one because the BY variables
1222 might not be in the output (the user is allowed to drop
1225 if (mtf->prev_BY != NULL)
1227 new_BY = case_compare_2dict (&min->input, &mtf->prev_BY_case,
1228 min->by, mtf->prev_BY,
1230 if (mtf->last != NULL)
1231 case_data_rw (&mtf->buffered_case, mtf->last)->f = new_BY;
1232 casewriter_write (mtf->output, &mtf->buffered_case);
1237 case_move (&mtf->buffered_case, &c);
1238 if (mtf->first != NULL)
1239 case_data_rw (&mtf->buffered_case, mtf->first)->f = new_BY;
1243 mtf->prev_BY = min->by;
1244 case_destroy (&mtf->prev_BY_case);
1245 case_clone (&mtf->prev_BY_case, &min->input);
1249 /* Read another record from each input file FILE with minimum
1251 ll_for_each (file, struct mtf_file, ll, &mtf->files)
1252 if (file->type == MTF_FILE)
1254 if (file->sequence == min_sequence)
1255 mtf_read_record (mtf, file);
1261 /* Merge the dictionary for file F into master dictionary M. */
1263 mtf_merge_dictionary (struct dictionary *const m, struct mtf_file *f)
1265 struct dictionary *d = f->dict;
1266 const char *d_docs, *m_docs;
1269 if (dict_get_label (m) == NULL)
1270 dict_set_label (m, dict_get_label (d));
1272 d_docs = dict_get_documents (d);
1273 m_docs = dict_get_documents (m);
1277 dict_set_documents (m, d_docs);
1280 char *new_docs = xasprintf ("%s%s", m_docs, d_docs);
1281 dict_set_documents (m, new_docs);
1286 for (i = 0; i < dict_get_var_cnt (d); i++)
1288 struct variable *dv = dict_get_var (d, i);
1289 struct variable *mv = dict_lookup_var (m, var_get_name (dv));
1291 if (dict_class_from_id (var_get_name (dv)) == DC_SCRATCH)
1296 if (var_get_width (mv) != var_get_width (dv))
1298 char *dv_description = var_type_description (dv);
1299 char *mv_description = var_type_description (mv);
1300 msg (SE, _("Variable %s in file %s (%s) has different "
1301 "type or width from the same variable in "
1302 "earlier file (%s)."),
1303 var_get_name (dv), fh_get_name (f->handle),
1304 dv_description, mv_description);
1305 free (dv_description);
1306 free (mv_description);
1310 if (var_get_width (dv) == var_get_width (mv))
1312 if (var_has_value_labels (dv) && !var_has_value_labels (mv))
1313 var_set_value_labels (mv, var_get_value_labels (dv));
1314 if (var_has_missing_values (dv) && !var_has_missing_values (mv))
1315 var_set_missing_values (mv, var_get_missing_values (dv));
1318 if (var_get_label (dv) && !var_get_label (mv))
1319 var_set_label (mv, var_get_label (dv));
1322 mv = dict_clone_var_assert (m, dv, var_get_name (dv));
1330 A case map copies data from a case that corresponds for one
1331 dictionary to a case that corresponds to a second dictionary
1332 derived from the first by, optionally, deleting, reordering,
1333 or renaming variables. (No new variables may be created.)
1339 size_t value_cnt; /* Number of values in map. */
1340 int *map; /* For each destination index, the
1341 corresponding source index. */
1344 /* Prepares dictionary D for producing a case map. Afterward,
1345 the caller may delete, reorder, or rename variables within D
1346 at will before using finish_case_map() to produce the case
1349 Uses D's aux members, which must otherwise not be in use. */
1351 start_case_map (struct dictionary *d)
1353 size_t var_cnt = dict_get_var_cnt (d);
1356 for (i = 0; i < var_cnt; i++)
1358 struct variable *v = dict_get_var (d, i);
1359 int *src_fv = xmalloc (sizeof *src_fv);
1360 *src_fv = var_get_case_index (v);
1361 var_attach_aux (v, src_fv, var_dtor_free);
1365 /* Produces a case map from dictionary D, which must have been
1366 previously prepared with start_case_map().
1368 Does not retain any reference to D, and clears the aux members
1369 set up by start_case_map().
1371 Returns the new case map, or a null pointer if no mapping is
1372 required (that is, no data has changed position). */
1373 static struct case_map *
1374 finish_case_map (struct dictionary *d)
1376 struct case_map *map;
1377 size_t var_cnt = dict_get_var_cnt (d);
1381 map = xmalloc (sizeof *map);
1382 map->value_cnt = dict_get_next_value_idx (d);
1383 map->map = xnmalloc (map->value_cnt, sizeof *map->map);
1384 for (i = 0; i < map->value_cnt; i++)
1388 for (i = 0; i < var_cnt; i++)
1390 struct variable *v = dict_get_var (d, i);
1391 size_t value_cnt = var_get_value_cnt (v);
1392 int *src_fv = (int *) var_detach_aux (v);
1395 if (var_get_case_index (v) != *src_fv)
1398 for (idx = 0; idx < value_cnt; idx++)
1400 int src_idx = *src_fv + idx;
1401 int dst_idx = var_get_case_index (v) + idx;
1403 assert (map->map[dst_idx] == -1);
1404 map->map[dst_idx] = src_idx;
1411 destroy_case_map (map);
1415 while (map->value_cnt > 0 && map->map[map->value_cnt - 1] == -1)
1421 /* Maps from SRC to DST, applying case map MAP. */
1423 map_case (const struct case_map *map,
1424 const struct ccase *src, struct ccase *dst)
1428 case_create (dst, map->value_cnt);
1429 for (dst_idx = 0; dst_idx < map->value_cnt; dst_idx++)
1431 int src_idx = map->map[dst_idx];
1433 *case_data_rw_idx (dst, dst_idx) = *case_data_idx (src, src_idx);
1437 /* Destroys case map MAP. */
1439 destroy_case_map (struct case_map *map)
1448 /* Returns the number of `union value's in cases created by
1451 case_map_get_value_cnt (const struct case_map *map)
1453 return map->value_cnt;